components/autofill/core/browser/autofill_regex_constants.cc - Issue 1518783002: Revert of autofill: switch autofill_regexes to RE2 library

Side by Side Diff: components/autofill/core/browser/autofill_regex_constants.cc

Issue 1518783002: Revert of autofill: switch autofill_regexes to RE2 library (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // This file contains UTF8 strings that we want as char arrays. To avoid	5 // This file contains UTF8 strings that we want as char arrays. To avoid

6 // different compilers, we use a script to convert the UTF8 strings into	6 // different compilers, we use a script to convert the UTF8 strings into

7 // numeric literals (\x##).	7 // numeric literals (\x##).

8	8

9 #include "components/autofill/core/browser/autofill_regex_constants.h"	9 #include "components/autofill/core/browser/autofill_regex_constants.h"

10	10

11 // This macro is to workaround the fact that RE2 library only supports ASCII

12 // word boundaries and it is supposed to be the same as \b.

13 #define WORDBREAK "(\\A\|\\z\|\\PL)"

14

15 namespace autofill {	11 namespace autofill {

16	12

17 /////////////////////////////////////////////////////////////////////////////	13 /////////////////////////////////////////////////////////////////////////////

18 // address_field.cc	14 // address_field.cc

19 /////////////////////////////////////////////////////////////////////////////	15 /////////////////////////////////////////////////////////////////////////////

20 const char kAttentionIgnoredRe[] = "attention\|attn";	16 const char kAttentionIgnoredRe[] = "attention\|attn";

21 const char kRegionIgnoredRe[] =	17 const char kRegionIgnoredRe[] =

22 "province\|region\|other"	18 "province\|region\|other"

23 "\|provincia" // es	19 "\|provincia" // es

24 "\|bairro\|suburb"; // pt-BR, pt-PT	20 "\|bairro\|suburb"; // pt-BR, pt-PT

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
81 "\|país\|pais" // es	77 "\|país\|pais" // es

82 "\|国" // ja-JP	78 "\|国" // ja-JP

83 "\|国家" // zh-CN	79 "\|国家" // zh-CN

84 "\|국가\|나라"; // ko-KR	80 "\|국가\|나라"; // ko-KR

85 const char kCountryLocationRe[] =	81 const char kCountryLocationRe[] =

86 "location";	82 "location";

87 const char kZipCodeRe[] =	83 const char kZipCodeRe[] =

88 "zip\|postal\|post.*code\|pcode"	84 "zip\|postal\|post.*code\|pcode"

89 "\|pin.?code" // en-IN	85 "\|pin.?code" // en-IN

90 "\|postleitzahl" // de-DE	86 "\|postleitzahl" // de-DE

91 "\|" WORDBREAK "cp" WORDBREAK // es	87 "\|\\bcp\\b" // es

92 "\|" WORDBREAK "cdp" WORDBREAK // fr-FR	88 "\|\\bcdp\\b" // fr-FR

93 "\|" WORDBREAK "cap" WORDBREAK // it-IT	89 "\|\\bcap\\b" // it-IT

94 "\|郵便番号" // ja-JP	90 "\|郵便番号" // ja-JP

95 "\|codigo\|codpos\|" WORDBREAK "cep" WORDBREAK // pt-BR, pt-PT	91 "\|codigo\|codpos\|\\bcep\\b" // pt-BR, pt-PT

96 "\|Почтовый.?Индекс" // ru	92 "\|Почтовый.?Индекс" // ru

97 "\|邮政编码\|邮编" // zh-CN	93 "\|邮政编码\|邮编" // zh-CN

98 "\|郵遞區號" // zh-TW	94 "\|郵遞區號" // zh-TW

99 "\|우편.?번호"; // ko-KR	95 "\|우편.?번호"; // ko-KR

100 const char kZip4Re[] =	96 const char kZip4Re[] =

101 "zip\|^-$\|post2"	97 "zip\|^-$\|post2"

102 "\|codpos2"; // pt-BR, pt-PT	98 "\|codpos2"; // pt-BR, pt-PT

103 const char kCityRe[] =	99 const char kCityRe[] =

104 "city\|town"	100 "city\|town"

105 "\|" WORDBREAK "ort" WORDBREAK "\|stadt" // de-DE	101 "\|\\bort\\b\|stadt" // de-DE

106 "\|suburb" // en-AU	102 "\|suburb" // en-AU

107 "\|ciudad\|provincia\|localidad\|poblacion" // es	103 "\|ciudad\|provincia\|localidad\|poblacion" // es

108 "\|ville\|commune" // fr-FR	104 "\|ville\|commune" // fr-FR

109 "\|localita" // it-IT	105 "\|localita" // it-IT

110 "\|市区町村" // ja-JP	106 "\|市区町村" // ja-JP

111 "\|cidade" // pt-BR, pt-PT	107 "\|cidade" // pt-BR, pt-PT

112 "\|Город" // ru	108 "\|Город" // ru

113 "\|市" // zh-CN	109 "\|市" // zh-CN

114 "\|分區" // zh-TW	110 "\|分區" // zh-TW

115 "\|^시[^도·・]\|시[·・]?군[·・]?구"; // ko-KR	111 "\|^시[^도·・]\|시[·・]?군[·・]?구"; // ko-KR

116 const char kStateRe[] =	112 const char kStateRe[] =

117 "state\|county\|region\|province"	113 "(?<!united )state\|county\|region\|province"

118 "\|land" // de-DE	114 "\|land" // de-DE

119 "\|county\|principality" // en-UK	115 "\|county\|principality" // en-UK

120 "\|都道府県" // ja-JP	116 "\|都道府県" // ja-JP

121 "\|estado\|provincia" // pt-BR, pt-PT	117 "\|estado\|provincia" // pt-BR, pt-PT

122 "\|область" // ru	118 "\|область" // ru

123 "\|省" // zh-CN	119 "\|省" // zh-CN

124 "\|地區" // zh-TW	120 "\|地區" // zh-TW

125 "\|^시[·・]?도"; // ko-KR	121 "\|^시[·・]?도"; // ko-KR

126	122

127 /////////////////////////////////////////////////////////////////////////////	123 /////////////////////////////////////////////////////////////////////////////

128 // credit_card_field.cc	124 // credit_card_field.cc

129 /////////////////////////////////////////////////////////////////////////////	125 /////////////////////////////////////////////////////////////////////////////

130 const char kNameOnCardRe[] =	126 const char kNameOnCardRe[] =

131 "card.?(holder\|owner)\|name." WORDBREAK "on" WORDBREAK ".card"	127 "card.?(holder\|owner)\|name.\\bon\\b.card\|(card\|cc).?name\|cc.?full.?name"

132 "\|(card\|cc).?name\|cc.?full.?name"

133 "\|karteninhaber" // de-DE	128 "\|karteninhaber" // de-DE

134 "\|nombre.*tarjeta" // es	129 "\|nombre.*tarjeta" // es

135 "\|nom.*carte" // fr-FR	130 "\|nom.*carte" // fr-FR

136 "\|nome.*cart" // it-IT	131 "\|nome.*cart" // it-IT

137 "\|名前" // ja-JP	132 "\|名前" // ja-JP

138 "\|Имя.*карты" // ru	133 "\|Имя.*карты" // ru

139 "\|信用卡开户名\|开户名\|持卡人姓名" // zh-CN	134 "\|信用卡开户名\|开户名\|持卡人姓名" // zh-CN

140 "\|持卡人姓名"; // zh-TW	135 "\|持卡人姓名"; // zh-TW

141 const char kNameOnCardContextualRe[] =	136 const char kNameOnCardContextualRe[] =

142 "name";	137 "name";

143 const char kCardNumberRe[] =	138 const char kCardNumberRe[] =

144 "(card\|cc\|acct).?(number\|#\|no\|num)"	139 "(card\|cc\|acct).?(number\|#\|no\|num)"

145 "\|nummer" // de-DE	140 "\|nummer" // de-DE

146 "\|credito\|numero\|número" // es	141 "\|credito\|numero\|número" // es

147 "\|numéro" // fr-FR	142 "\|numéro" // fr-FR

148 "\|カード番号" // ja-JP	143 "\|カード番号" // ja-JP

149 "\|Номер.*карты" // ru	144 "\|Номер.*карты" // ru

150 "\|信用卡号\|信用卡号码" // zh-CN	145 "\|信用卡号\|信用卡号码" // zh-CN

151 "\|信用卡卡號" // zh-TW	146 "\|信用卡卡號" // zh-TW

152 "\|카드"; // ko-KR	147 "\|카드"; // ko-KR

153 const char kCardCvcRe[] =	148 const char kCardCvcRe[] =

154 "verification\|card identification\|security code\|card code"	149 "verification\|card identification\|security code\|card code"

155 "\|cvn\|cvv\|cvc\|csc\|cvd\|cid\|ccv"	150 "\|cvn\|cvv\|cvc\|csc\|cvd\|cid\|ccv"

156 "\|" WORDBREAK "cid" WORDBREAK;	151 "\|\\bcid\\b";

157	152

158 // "Expiration date" is the most common label here, but some pages have	153 // "Expiration date" is the most common label here, but some pages have

159 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look	154 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look

160 // for the field names ccmonth and ccyear, which appear on at least 4 of	155 // for the field names ccmonth and ccyear, which appear on at least 4 of

161 // our test pages.	156 // our test pages.

162	157

163 // On at least one page (The China Shop2.html) we find only the labels	158 // On at least one page (The China Shop2.html) we find only the labels

164 // "month" and "year". So for now we match these words directly; we'll	159 // "month" and "year". So for now we match these words directly; we'll

165 // see if this turns out to be too general.	160 // see if this turns out to be too general.

166	161

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
245 "\|^nome"; // pt-BR, pt-PT	240 "\|^nome"; // pt-BR, pt-PT

246 const char kFirstNameRe[] =	241 const char kFirstNameRe[] =

247 "first.name\|initials\|fname\|first$\|given.name"	242 "first.name\|initials\|fname\|first$\|given.name"

248 "\|vorname" // de-DE	243 "\|vorname" // de-DE

249 "\|nombre" // es	244 "\|nombre" // es

250 "\|forename\|prénom\|prenom" // fr-FR	245 "\|forename\|prénom\|prenom" // fr-FR

251 "\|名" // ja-JP	246 "\|名" // ja-JP

252 "\|nome" // pt-BR, pt-PT	247 "\|nome" // pt-BR, pt-PT

253 "\|Имя" // ru	248 "\|Имя" // ru

254 "\|이름"; // ko-KR	249 "\|이름"; // ko-KR

255 const char kMiddleInitialRe[] =	250 const char kMiddleInitialRe[] = "middle.*initial\|m\\.i\\.\|mi$\|\\bmi\\b";

256 "middle.*initial\|m\\.i\\.\|mi$\|" WORDBREAK "mi" WORDBREAK;

257 const char kMiddleNameRe[] =	251 const char kMiddleNameRe[] =

258 "middle.*name\|mname\|middle$"	252 "middle.*name\|mname\|middle$"

259 "\|apellido.?materno\|lastlastname"; // es	253 "\|apellido.?materno\|lastlastname"; // es

260 const char kLastNameRe[] =	254 const char kLastNameRe[] =

261 "last.name\|lname\|surname\|last$\|secondname\|family.name"	255 "last.name\|lname\|surname\|last$\|secondname\|family.name"

262 "\|nachname" // de-DE	256 "\|nachname" // de-DE

263 "\|apellido" // es	257 "\|apellido" // es

264 "\|famille\|^nom" // fr-FR	258 "\|famille\|^nom" // fr-FR

265 "\|cognome" // it-IT	259 "\|cognome" // it-IT

266 "\|姓" // ja-JP	260 "\|姓" // ja-JP

(...skipping 25 matching lines...) Expand all Loading...
292 "^-$\|^\\)$";	286 "^-$\|^\\)$";

293 const char kPhoneSuffixSeparatorRe[] =	287 const char kPhoneSuffixSeparatorRe[] =

294 "^-$";	288 "^-$";

295 const char kPhonePrefixRe[] =	289 const char kPhonePrefixRe[] =

296 "prefix\|exchange"	290 "prefix\|exchange"

297 "\|preselection" // fr-FR	291 "\|preselection" // fr-FR

298 "\|ddd"; // pt-BR, pt-PT	292 "\|ddd"; // pt-BR, pt-PT

299 const char kPhoneSuffixRe[] =	293 const char kPhoneSuffixRe[] =

300 "suffix";	294 "suffix";

301 const char kPhoneExtensionRe[] =	295 const char kPhoneExtensionRe[] =

302 WORDBREAK "ext\|ext" WORDBREAK "\|extension"	296 "\\bext\|ext\\b\|extension"

303 "\|ramal"; // pt-BR, pt-PT	297 "\|ramal"; // pt-BR, pt-PT

304	298

305 } // namespace autofill	299 } // namespace autofill

306

307 #undef WORDBREAK

OLD	NEW

« no previous file with comments | « components/autofill/core/browser/address_field.cc ('k') | components/autofill/core/browser/credit_card.cc » ('j') | no next file with comments »