Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1436)

Side by Side Diff: components/autofill/core/browser/autofill_regex_constants.cc

Issue 1518783002: Revert of autofill: switch autofill_regexes to RE2 library (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // This file contains UTF8 strings that we want as char arrays. To avoid 5 // This file contains UTF8 strings that we want as char arrays. To avoid
6 // different compilers, we use a script to convert the UTF8 strings into 6 // different compilers, we use a script to convert the UTF8 strings into
7 // numeric literals (\x##). 7 // numeric literals (\x##).
8 8
9 #include "components/autofill/core/browser/autofill_regex_constants.h" 9 #include "components/autofill/core/browser/autofill_regex_constants.h"
10 10
11 // This macro is to workaround the fact that RE2 library only supports ASCII
12 // word boundaries and it is supposed to be the same as \b.
13 #define WORDBREAK "(\\A|\\z|\\PL)"
14
15 namespace autofill { 11 namespace autofill {
16 12
17 ///////////////////////////////////////////////////////////////////////////// 13 /////////////////////////////////////////////////////////////////////////////
18 // address_field.cc 14 // address_field.cc
19 ///////////////////////////////////////////////////////////////////////////// 15 /////////////////////////////////////////////////////////////////////////////
20 const char kAttentionIgnoredRe[] = "attention|attn"; 16 const char kAttentionIgnoredRe[] = "attention|attn";
21 const char kRegionIgnoredRe[] = 17 const char kRegionIgnoredRe[] =
22 "province|region|other" 18 "province|region|other"
23 "|provincia" // es 19 "|provincia" // es
24 "|bairro|suburb"; // pt-BR, pt-PT 20 "|bairro|suburb"; // pt-BR, pt-PT
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
81 "|país|pais" // es 77 "|país|pais" // es
82 "|国" // ja-JP 78 "|国" // ja-JP
83 "|国家" // zh-CN 79 "|国家" // zh-CN
84 "|국가|나라"; // ko-KR 80 "|국가|나라"; // ko-KR
85 const char kCountryLocationRe[] = 81 const char kCountryLocationRe[] =
86 "location"; 82 "location";
87 const char kZipCodeRe[] = 83 const char kZipCodeRe[] =
88 "zip|postal|post.*code|pcode" 84 "zip|postal|post.*code|pcode"
89 "|pin.?code" // en-IN 85 "|pin.?code" // en-IN
90 "|postleitzahl" // de-DE 86 "|postleitzahl" // de-DE
91 "|" WORDBREAK "cp" WORDBREAK // es 87 "|\\bcp\\b" // es
92 "|" WORDBREAK "cdp" WORDBREAK // fr-FR 88 "|\\bcdp\\b" // fr-FR
93 "|" WORDBREAK "cap" WORDBREAK // it-IT 89 "|\\bcap\\b" // it-IT
94 "|郵便番号" // ja-JP 90 "|郵便番号" // ja-JP
95 "|codigo|codpos|" WORDBREAK "cep" WORDBREAK // pt-BR, pt-PT 91 "|codigo|codpos|\\bcep\\b" // pt-BR, pt-PT
96 "|Почтовый.?Индекс" // ru 92 "|Почтовый.?Индекс" // ru
97 "|邮政编码|邮编" // zh-CN 93 "|邮政编码|邮编" // zh-CN
98 "|郵遞區號" // zh-TW 94 "|郵遞區號" // zh-TW
99 "|우편.?번호"; // ko-KR 95 "|우편.?번호"; // ko-KR
100 const char kZip4Re[] = 96 const char kZip4Re[] =
101 "zip|^-$|post2" 97 "zip|^-$|post2"
102 "|codpos2"; // pt-BR, pt-PT 98 "|codpos2"; // pt-BR, pt-PT
103 const char kCityRe[] = 99 const char kCityRe[] =
104 "city|town" 100 "city|town"
105 "|" WORDBREAK "ort" WORDBREAK "|stadt" // de-DE 101 "|\\bort\\b|stadt" // de-DE
106 "|suburb" // en-AU 102 "|suburb" // en-AU
107 "|ciudad|provincia|localidad|poblacion" // es 103 "|ciudad|provincia|localidad|poblacion" // es
108 "|ville|commune" // fr-FR 104 "|ville|commune" // fr-FR
109 "|localita" // it-IT 105 "|localita" // it-IT
110 "|市区町村" // ja-JP 106 "|市区町村" // ja-JP
111 "|cidade" // pt-BR, pt-PT 107 "|cidade" // pt-BR, pt-PT
112 "|Город" // ru 108 "|Город" // ru
113 "|市" // zh-CN 109 "|市" // zh-CN
114 "|分區" // zh-TW 110 "|分區" // zh-TW
115 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR 111 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR
116 const char kStateRe[] = 112 const char kStateRe[] =
117 "state|county|region|province" 113 "(?<!united )state|county|region|province"
118 "|land" // de-DE 114 "|land" // de-DE
119 "|county|principality" // en-UK 115 "|county|principality" // en-UK
120 "|都道府県" // ja-JP 116 "|都道府県" // ja-JP
121 "|estado|provincia" // pt-BR, pt-PT 117 "|estado|provincia" // pt-BR, pt-PT
122 "|область" // ru 118 "|область" // ru
123 "|省" // zh-CN 119 "|省" // zh-CN
124 "|地區" // zh-TW 120 "|地區" // zh-TW
125 "|^시[·・]?도"; // ko-KR 121 "|^시[·・]?도"; // ko-KR
126 122
127 ///////////////////////////////////////////////////////////////////////////// 123 /////////////////////////////////////////////////////////////////////////////
128 // credit_card_field.cc 124 // credit_card_field.cc
129 ///////////////////////////////////////////////////////////////////////////// 125 /////////////////////////////////////////////////////////////////////////////
130 const char kNameOnCardRe[] = 126 const char kNameOnCardRe[] =
131 "card.?(holder|owner)|name.*" WORDBREAK "on" WORDBREAK ".*card" 127 "card.?(holder|owner)|name.*\\bon\\b.*card|(card|cc).?name|cc.?full.?name"
132 "|(card|cc).?name|cc.?full.?name"
133 "|karteninhaber" // de-DE 128 "|karteninhaber" // de-DE
134 "|nombre.*tarjeta" // es 129 "|nombre.*tarjeta" // es
135 "|nom.*carte" // fr-FR 130 "|nom.*carte" // fr-FR
136 "|nome.*cart" // it-IT 131 "|nome.*cart" // it-IT
137 "|名前" // ja-JP 132 "|名前" // ja-JP
138 "|Имя.*карты" // ru 133 "|Имя.*карты" // ru
139 "|信用卡开户名|开户名|持卡人姓名" // zh-CN 134 "|信用卡开户名|开户名|持卡人姓名" // zh-CN
140 "|持卡人姓名"; // zh-TW 135 "|持卡人姓名"; // zh-TW
141 const char kNameOnCardContextualRe[] = 136 const char kNameOnCardContextualRe[] =
142 "name"; 137 "name";
143 const char kCardNumberRe[] = 138 const char kCardNumberRe[] =
144 "(card|cc|acct).?(number|#|no|num)" 139 "(card|cc|acct).?(number|#|no|num)"
145 "|nummer" // de-DE 140 "|nummer" // de-DE
146 "|credito|numero|número" // es 141 "|credito|numero|número" // es
147 "|numéro" // fr-FR 142 "|numéro" // fr-FR
148 "|カード番号" // ja-JP 143 "|カード番号" // ja-JP
149 "|Номер.*карты" // ru 144 "|Номер.*карты" // ru
150 "|信用卡号|信用卡号码" // zh-CN 145 "|信用卡号|信用卡号码" // zh-CN
151 "|信用卡卡號" // zh-TW 146 "|信用卡卡號" // zh-TW
152 "|카드"; // ko-KR 147 "|카드"; // ko-KR
153 const char kCardCvcRe[] = 148 const char kCardCvcRe[] =
154 "verification|card identification|security code|card code" 149 "verification|card identification|security code|card code"
155 "|cvn|cvv|cvc|csc|cvd|cid|ccv" 150 "|cvn|cvv|cvc|csc|cvd|cid|ccv"
156 "|" WORDBREAK "cid" WORDBREAK; 151 "|\\bcid\\b";
157 152
158 // "Expiration date" is the most common label here, but some pages have 153 // "Expiration date" is the most common label here, but some pages have
159 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look 154 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look
160 // for the field names ccmonth and ccyear, which appear on at least 4 of 155 // for the field names ccmonth and ccyear, which appear on at least 4 of
161 // our test pages. 156 // our test pages.
162 157
163 // On at least one page (The China Shop2.html) we find only the labels 158 // On at least one page (The China Shop2.html) we find only the labels
164 // "month" and "year". So for now we match these words directly; we'll 159 // "month" and "year". So for now we match these words directly; we'll
165 // see if this turns out to be too general. 160 // see if this turns out to be too general.
166 161
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
245 "|^nome"; // pt-BR, pt-PT 240 "|^nome"; // pt-BR, pt-PT
246 const char kFirstNameRe[] = 241 const char kFirstNameRe[] =
247 "first.*name|initials|fname|first$|given.*name" 242 "first.*name|initials|fname|first$|given.*name"
248 "|vorname" // de-DE 243 "|vorname" // de-DE
249 "|nombre" // es 244 "|nombre" // es
250 "|forename|prénom|prenom" // fr-FR 245 "|forename|prénom|prenom" // fr-FR
251 "|名" // ja-JP 246 "|名" // ja-JP
252 "|nome" // pt-BR, pt-PT 247 "|nome" // pt-BR, pt-PT
253 "|Имя" // ru 248 "|Имя" // ru
254 "|이름"; // ko-KR 249 "|이름"; // ko-KR
255 const char kMiddleInitialRe[] = 250 const char kMiddleInitialRe[] = "middle.*initial|m\\.i\\.|mi$|\\bmi\\b";
256 "middle.*initial|m\\.i\\.|mi$|" WORDBREAK "mi" WORDBREAK;
257 const char kMiddleNameRe[] = 251 const char kMiddleNameRe[] =
258 "middle.*name|mname|middle$" 252 "middle.*name|mname|middle$"
259 "|apellido.?materno|lastlastname"; // es 253 "|apellido.?materno|lastlastname"; // es
260 const char kLastNameRe[] = 254 const char kLastNameRe[] =
261 "last.*name|lname|surname|last$|secondname|family.*name" 255 "last.*name|lname|surname|last$|secondname|family.*name"
262 "|nachname" // de-DE 256 "|nachname" // de-DE
263 "|apellido" // es 257 "|apellido" // es
264 "|famille|^nom" // fr-FR 258 "|famille|^nom" // fr-FR
265 "|cognome" // it-IT 259 "|cognome" // it-IT
266 "|姓" // ja-JP 260 "|姓" // ja-JP
(...skipping 25 matching lines...) Expand all
292 "^-$|^\\)$"; 286 "^-$|^\\)$";
293 const char kPhoneSuffixSeparatorRe[] = 287 const char kPhoneSuffixSeparatorRe[] =
294 "^-$"; 288 "^-$";
295 const char kPhonePrefixRe[] = 289 const char kPhonePrefixRe[] =
296 "prefix|exchange" 290 "prefix|exchange"
297 "|preselection" // fr-FR 291 "|preselection" // fr-FR
298 "|ddd"; // pt-BR, pt-PT 292 "|ddd"; // pt-BR, pt-PT
299 const char kPhoneSuffixRe[] = 293 const char kPhoneSuffixRe[] =
300 "suffix"; 294 "suffix";
301 const char kPhoneExtensionRe[] = 295 const char kPhoneExtensionRe[] =
302 WORDBREAK "ext|ext" WORDBREAK "|extension" 296 "\\bext|ext\\b|extension"
303 "|ramal"; // pt-BR, pt-PT 297 "|ramal"; // pt-BR, pt-PT
304 298
305 } // namespace autofill 299 } // namespace autofill
306
307 #undef WORDBREAK
OLDNEW
« no previous file with comments | « components/autofill/core/browser/address_field.cc ('k') | components/autofill/core/browser/credit_card.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698