Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(433)

Side by Side Diff: components/autofill/core/browser/autofill_regex_constants.cc

Issue 1453193002: autofill: switch autofill_regexes to RE2 library (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: WORDBREAK macro Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // This file contains UTF8 strings that we want as char arrays. To avoid 5 // This file contains UTF8 strings that we want as char arrays. To avoid
6 // different compilers, we use a script to convert the UTF8 strings into 6 // different compilers, we use a script to convert the UTF8 strings into
7 // numeric literals (\x##). 7 // numeric literals (\x##).
8 8
9 #include "components/autofill/core/browser/autofill_regex_constants.h" 9 #include "components/autofill/core/browser/autofill_regex_constants.h"
10 10
11 #define WORDBREAK "(\\A|\\z|\\PL)"
Evan Stade 2015/12/07 23:56:10 could you add a comment explaining this is suppose
tfarina 2015/12/08 12:46:28 Done.
12
11 namespace autofill { 13 namespace autofill {
12 14
13 ///////////////////////////////////////////////////////////////////////////// 15 /////////////////////////////////////////////////////////////////////////////
14 // address_field.cc 16 // address_field.cc
15 ///////////////////////////////////////////////////////////////////////////// 17 /////////////////////////////////////////////////////////////////////////////
16 const char kAttentionIgnoredRe[] = "attention|attn"; 18 const char kAttentionIgnoredRe[] = "attention|attn";
17 const char kRegionIgnoredRe[] = 19 const char kRegionIgnoredRe[] =
18 "province|region|other" 20 "province|region|other"
19 "|provincia" // es 21 "|provincia" // es
20 "|bairro|suburb"; // pt-BR, pt-PT 22 "|bairro|suburb"; // pt-BR, pt-PT
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 "|país|pais" // es 79 "|país|pais" // es
78 "|国" // ja-JP 80 "|国" // ja-JP
79 "|国家" // zh-CN 81 "|国家" // zh-CN
80 "|국가|나라"; // ko-KR 82 "|국가|나라"; // ko-KR
81 const char kCountryLocationRe[] = 83 const char kCountryLocationRe[] =
82 "location"; 84 "location";
83 const char kZipCodeRe[] = 85 const char kZipCodeRe[] =
84 "zip|postal|post.*code|pcode" 86 "zip|postal|post.*code|pcode"
85 "|pin.?code" // en-IN 87 "|pin.?code" // en-IN
86 "|postleitzahl" // de-DE 88 "|postleitzahl" // de-DE
87 "|\\bcp\\b" // es 89 "|" WORDBREAK "cp" WORDBREAK // es
88 "|\\bcdp\\b" // fr-FR 90 "|" WORDBREAK "cdp" WORDBREAK // fr-FR
89 "|\\bcap\\b" // it-IT 91 "|" WORDBREAK "cap" WORDBREAK // it-IT
90 "|郵便番号" // ja-JP 92 "|郵便番号" // ja-JP
91 "|codigo|codpos|\\bcep\\b" // pt-BR, pt-PT 93 "|codigo|codpos|" WORDBREAK "cep" WORDBREAK // pt-BR, pt-PT
92 "|Почтовый.?Индекс" // ru 94 "|Почтовый.?Индекс" // ru
93 "|邮政编码|邮编" // zh-CN 95 "|邮政编码|邮编" // zh-CN
94 "|郵遞區號" // zh-TW 96 "|郵遞區號" // zh-TW
95 "|우편.?번호"; // ko-KR 97 "|우편.?번호"; // ko-KR
96 const char kZip4Re[] = 98 const char kZip4Re[] =
97 "zip|^-$|post2" 99 "zip|^-$|post2"
98 "|codpos2"; // pt-BR, pt-PT 100 "|codpos2"; // pt-BR, pt-PT
99 const char kCityRe[] = 101 const char kCityRe[] =
100 "city|town" 102 "city|town"
101 "|\\bort\\b|stadt" // de-DE 103 "|" WORDBREAK "ort" WORDBREAK "|stadt" // de-DE
102 "|suburb" // en-AU 104 "|suburb" // en-AU
103 "|ciudad|provincia|localidad|poblacion" // es 105 "|ciudad|provincia|localidad|poblacion" // es
104 "|ville|commune" // fr-FR 106 "|ville|commune" // fr-FR
105 "|localita" // it-IT 107 "|localita" // it-IT
106 "|市区町村" // ja-JP 108 "|市区町村" // ja-JP
107 "|cidade" // pt-BR, pt-PT 109 "|cidade" // pt-BR, pt-PT
108 "|Город" // ru 110 "|Город" // ru
109 "|市" // zh-CN 111 "|市" // zh-CN
110 "|分區" // zh-TW 112 "|分區" // zh-TW
111 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR 113 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR
112 const char kStateRe[] = 114 const char kStateRe[] =
113 "(?<!united )state|county|region|province" 115 "state|county|region|province"
114 "|land" // de-DE 116 "|land" // de-DE
115 "|county|principality" // en-UK 117 "|county|principality" // en-UK
116 "|都道府県" // ja-JP 118 "|都道府県" // ja-JP
117 "|estado|provincia" // pt-BR, pt-PT 119 "|estado|provincia" // pt-BR, pt-PT
118 "|область" // ru 120 "|область" // ru
119 "|省" // zh-CN 121 "|省" // zh-CN
120 "|地區" // zh-TW 122 "|地區" // zh-TW
121 "|^시[·・]?도"; // ko-KR 123 "|^시[·・]?도"; // ko-KR
122 124
123 ///////////////////////////////////////////////////////////////////////////// 125 /////////////////////////////////////////////////////////////////////////////
124 // credit_card_field.cc 126 // credit_card_field.cc
125 ///////////////////////////////////////////////////////////////////////////// 127 /////////////////////////////////////////////////////////////////////////////
126 const char kNameOnCardRe[] = 128 const char kNameOnCardRe[] =
127 "card.?(holder|owner)|name.*\\bon\\b.*card|(card|cc).?name|cc.?full.?name" 129 "card.?(holder|owner)|name.*" WORDBREAK "on" WORDBREAK
128 "|karteninhaber" // de-DE 130 ".*card|(card|cc).?name|cc.?full.?name"
Ilya Sherman 2015/12/07 23:49:25 nit: Please wrap this as "card.?(holder|owner
tfarina 2015/12/08 12:46:28 Done.
129 "|nombre.*tarjeta" // es 131 "|karteninhaber" // de-DE
130 "|nom.*carte" // fr-FR 132 "|nombre.*tarjeta" // es
131 "|nome.*cart" // it-IT 133 "|nom.*carte" // fr-FR
132 "|名前" // ja-JP 134 "|nome.*cart" // it-IT
133 "|Имя.*карты" // ru 135 "|名前" // ja-JP
136 "|Имя.*карты" // ru
134 "|信用卡开户名|开户名|持卡人姓名" // zh-CN 137 "|信用卡开户名|开户名|持卡人姓名" // zh-CN
135 "|持卡人姓名"; // zh-TW 138 "|持卡人姓名"; // zh-TW
Ilya Sherman 2015/12/07 23:49:25 nit: Please revert these alignment changes.
tfarina 2015/12/08 12:46:28 Sorry, that was clang format. Reverted.
136 const char kNameOnCardContextualRe[] = 139 const char kNameOnCardContextualRe[] =
137 "name"; 140 "name";
138 const char kCardNumberRe[] = 141 const char kCardNumberRe[] =
139 "(card|cc|acct).?(number|#|no|num)" 142 "(card|cc|acct).?(number|#|no|num)"
140 "|nummer" // de-DE 143 "|nummer" // de-DE
141 "|credito|numero|número" // es 144 "|credito|numero|número" // es
142 "|numéro" // fr-FR 145 "|numéro" // fr-FR
143 "|カード番号" // ja-JP 146 "|カード番号" // ja-JP
144 "|Номер.*карты" // ru 147 "|Номер.*карты" // ru
145 "|信用卡号|信用卡号码" // zh-CN 148 "|信用卡号|信用卡号码" // zh-CN
146 "|信用卡卡號" // zh-TW 149 "|信用卡卡號" // zh-TW
147 "|카드"; // ko-KR 150 "|카드"; // ko-KR
148 const char kCardCvcRe[] = 151 const char kCardCvcRe[] =
149 "verification|card identification|security code|card code" 152 "verification|card identification|security code|card code"
150 "|cvn|cvv|cvc|csc|cvd|cid|ccv" 153 "|cvn|cvv|cvc|csc|cvd|cid|ccv"
151 "|\\bcid\\b"; 154 "|" WORDBREAK "cid" WORDBREAK;
152 155
153 // "Expiration date" is the most common label here, but some pages have 156 // "Expiration date" is the most common label here, but some pages have
154 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look 157 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look
155 // for the field names ccmonth and ccyear, which appear on at least 4 of 158 // for the field names ccmonth and ccyear, which appear on at least 4 of
156 // our test pages. 159 // our test pages.
157 160
158 // On at least one page (The China Shop2.html) we find only the labels 161 // On at least one page (The China Shop2.html) we find only the labels
159 // "month" and "year". So for now we match these words directly; we'll 162 // "month" and "year". So for now we match these words directly; we'll
160 // see if this turns out to be too general. 163 // see if this turns out to be too general.
161 164
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
240 "|^nome"; // pt-BR, pt-PT 243 "|^nome"; // pt-BR, pt-PT
241 const char kFirstNameRe[] = 244 const char kFirstNameRe[] =
242 "first.*name|initials|fname|first$|given.*name" 245 "first.*name|initials|fname|first$|given.*name"
243 "|vorname" // de-DE 246 "|vorname" // de-DE
244 "|nombre" // es 247 "|nombre" // es
245 "|forename|prénom|prenom" // fr-FR 248 "|forename|prénom|prenom" // fr-FR
246 "|名" // ja-JP 249 "|名" // ja-JP
247 "|nome" // pt-BR, pt-PT 250 "|nome" // pt-BR, pt-PT
248 "|Имя" // ru 251 "|Имя" // ru
249 "|이름"; // ko-KR 252 "|이름"; // ko-KR
250 const char kMiddleInitialRe[] = "middle.*initial|m\\.i\\.|mi$|\\bmi\\b"; 253 const char kMiddleInitialRe[] =
254 "middle.*initial|m\\.i\\.|mi$|" WORDBREAK "mi" WORDBREAK;
251 const char kMiddleNameRe[] = 255 const char kMiddleNameRe[] =
252 "middle.*name|mname|middle$" 256 "middle.*name|mname|middle$"
253 "|apellido.?materno|lastlastname"; // es 257 "|apellido.?materno|lastlastname"; // es
254 const char kLastNameRe[] = 258 const char kLastNameRe[] =
255 "last.*name|lname|surname|last$|secondname|family.*name" 259 "last.*name|lname|surname|last$|secondname|family.*name"
256 "|nachname" // de-DE 260 "|nachname" // de-DE
257 "|apellido" // es 261 "|apellido" // es
258 "|famille|^nom" // fr-FR 262 "|famille|^nom" // fr-FR
259 "|cognome" // it-IT 263 "|cognome" // it-IT
260 "|姓" // ja-JP 264 "|姓" // ja-JP
(...skipping 25 matching lines...) Expand all
286 "^-$|^\\)$"; 290 "^-$|^\\)$";
287 const char kPhoneSuffixSeparatorRe[] = 291 const char kPhoneSuffixSeparatorRe[] =
288 "^-$"; 292 "^-$";
289 const char kPhonePrefixRe[] = 293 const char kPhonePrefixRe[] =
290 "prefix|exchange" 294 "prefix|exchange"
291 "|preselection" // fr-FR 295 "|preselection" // fr-FR
292 "|ddd"; // pt-BR, pt-PT 296 "|ddd"; // pt-BR, pt-PT
293 const char kPhoneSuffixRe[] = 297 const char kPhoneSuffixRe[] =
294 "suffix"; 298 "suffix";
295 const char kPhoneExtensionRe[] = 299 const char kPhoneExtensionRe[] =
296 "\\bext|ext\\b|extension" 300 WORDBREAK "ext|ext" WORDBREAK "|extension"
297 "|ramal"; // pt-BR, pt-PT 301 "|ramal"; // pt-BR, pt-PT
298 302
299 } // namespace autofill 303 } // namespace autofill
OLDNEW
« no previous file with comments | « components/autofill/core/browser/address_field.cc ('k') | components/autofill/core/browser/credit_card.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698