components/autofill/core/browser/autofill_regex_constants.cc - Issue 1453193002: autofill: switch autofill_regexes to RE2 library

Side by Side Diff: components/autofill/core/browser/autofill_regex_constants.cc

Issue 1453193002: autofill: switch autofill_regexes to RE2 library (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: address reviews Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« components/autofill/core/browser/address_field.cc ('K') | « components/autofill/core/browser/address_field.cc ('k') | components/autofill/core/browser/credit_card.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // This file contains UTF8 strings that we want as char arrays. To avoid	5 // This file contains UTF8 strings that we want as char arrays. To avoid

6 // different compilers, we use a script to convert the UTF8 strings into	6 // different compilers, we use a script to convert the UTF8 strings into

7 // numeric literals (\x##).	7 // numeric literals (\x##).

8	8

9 #include "components/autofill/core/browser/autofill_regex_constants.h"	9 #include "components/autofill/core/browser/autofill_regex_constants.h"

10	10

	11 // This macro is to workaround the fact that RE2 library only supports ASCII

	12 // word boundaries and it is supposed to be the same as \b.

	13 #define WORDBREAK "(\\A\|\\z\|\\PL)"

	14

11 namespace autofill {	15 namespace autofill {

12	16

13 /////////////////////////////////////////////////////////////////////////////	17 /////////////////////////////////////////////////////////////////////////////

14 // address_field.cc	18 // address_field.cc

15 /////////////////////////////////////////////////////////////////////////////	19 /////////////////////////////////////////////////////////////////////////////

16 const char kAttentionIgnoredRe[] = "attention\|attn";	20 const char kAttentionIgnoredRe[] = "attention\|attn";

17 const char kRegionIgnoredRe[] =	21 const char kRegionIgnoredRe[] =

18 "province\|region\|other"	22 "province\|region\|other"

19 "\|provincia" // es	23 "\|provincia" // es

20 "\|bairro\|suburb"; // pt-BR, pt-PT	24 "\|bairro\|suburb"; // pt-BR, pt-PT

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
77 "\|país\|pais" // es	81 "\|país\|pais" // es

78 "\|国" // ja-JP	82 "\|国" // ja-JP

79 "\|国家" // zh-CN	83 "\|国家" // zh-CN

80 "\|국가\|나라"; // ko-KR	84 "\|국가\|나라"; // ko-KR

81 const char kCountryLocationRe[] =	85 const char kCountryLocationRe[] =

82 "location";	86 "location";

83 const char kZipCodeRe[] =	87 const char kZipCodeRe[] =

84 "zip\|postal\|post.*code\|pcode"	88 "zip\|postal\|post.*code\|pcode"

85 "\|pin.?code" // en-IN	89 "\|pin.?code" // en-IN

86 "\|postleitzahl" // de-DE	90 "\|postleitzahl" // de-DE

87 "\|\\bcp\\b" // es	91 "\|" WORDBREAK "cp" WORDBREAK // es

88 "\|\\bcdp\\b" // fr-FR	92 "\|" WORDBREAK "cdp" WORDBREAK // fr-FR

89 "\|\\bcap\\b" // it-IT	93 "\|" WORDBREAK "cap" WORDBREAK // it-IT

90 "\|郵便番号" // ja-JP	94 "\|郵便番号" // ja-JP

91 "\|codigo\|codpos\|\\bcep\\b" // pt-BR, pt-PT	95 "\|codigo\|codpos\|" WORDBREAK "cep" WORDBREAK // pt-BR, pt-PT

92 "\|Почтовый.?Индекс" // ru	96 "\|Почтовый.?Индекс" // ru

93 "\|邮政编码\|邮编" // zh-CN	97 "\|邮政编码\|邮编" // zh-CN

94 "\|郵遞區號" // zh-TW	98 "\|郵遞區號" // zh-TW

95 "\|우편.?번호"; // ko-KR	99 "\|우편.?번호"; // ko-KR

96 const char kZip4Re[] =	100 const char kZip4Re[] =

97 "zip\|^-$\|post2"	101 "zip\|^-$\|post2"

98 "\|codpos2"; // pt-BR, pt-PT	102 "\|codpos2"; // pt-BR, pt-PT

99 const char kCityRe[] =	103 const char kCityRe[] =

100 "city\|town"	104 "city\|town"

101 "\|\\bort\\b\|stadt" // de-DE	105 "\|" WORDBREAK "ort" WORDBREAK "\|stadt" // de-DE

102 "\|suburb" // en-AU	106 "\|suburb" // en-AU

103 "\|ciudad\|provincia\|localidad\|poblacion" // es	107 "\|ciudad\|provincia\|localidad\|poblacion" // es

104 "\|ville\|commune" // fr-FR	108 "\|ville\|commune" // fr-FR

105 "\|localita" // it-IT	109 "\|localita" // it-IT

106 "\|市区町村" // ja-JP	110 "\|市区町村" // ja-JP

107 "\|cidade" // pt-BR, pt-PT	111 "\|cidade" // pt-BR, pt-PT

108 "\|Город" // ru	112 "\|Город" // ru

109 "\|市" // zh-CN	113 "\|市" // zh-CN

110 "\|分區" // zh-TW	114 "\|分區" // zh-TW

111 "\|^시[^도·・]\|시[·・]?군[·・]?구"; // ko-KR	115 "\|^시[^도·・]\|시[·・]?군[·・]?구"; // ko-KR

112 const char kStateRe[] =	116 const char kStateRe[] =

113 "(?<!united )state\|county\|region\|province"	117 "state\|county\|region\|province"

114 "\|land" // de-DE	118 "\|land" // de-DE

115 "\|county\|principality" // en-UK	119 "\|county\|principality" // en-UK

116 "\|都道府県" // ja-JP	120 "\|都道府県" // ja-JP

117 "\|estado\|provincia" // pt-BR, pt-PT	121 "\|estado\|provincia" // pt-BR, pt-PT

118 "\|область" // ru	122 "\|область" // ru

119 "\|省" // zh-CN	123 "\|省" // zh-CN

120 "\|地區" // zh-TW	124 "\|地區" // zh-TW

121 "\|^시[·・]?도"; // ko-KR	125 "\|^시[·・]?도"; // ko-KR

122	126

123 /////////////////////////////////////////////////////////////////////////////	127 /////////////////////////////////////////////////////////////////////////////

124 // credit_card_field.cc	128 // credit_card_field.cc

125 /////////////////////////////////////////////////////////////////////////////	129 /////////////////////////////////////////////////////////////////////////////

126 const char kNameOnCardRe[] =	130 const char kNameOnCardRe[] =

127 "card.?(holder\|owner)\|name.\\bon\\b.card\|(card\|cc).?name\|cc.?full.?name"	131 "card.?(holder\|owner)\|name." WORDBREAK "on" WORDBREAK ".card"

	132 "\|(card\|cc).?name\|cc.?full.?name"

128 "\|karteninhaber" // de-DE	133 "\|karteninhaber" // de-DE

129 "\|nombre.*tarjeta" // es	134 "\|nombre.*tarjeta" // es

130 "\|nom.*carte" // fr-FR	135 "\|nom.*carte" // fr-FR

131 "\|nome.*cart" // it-IT	136 "\|nome.*cart" // it-IT

132 "\|名前" // ja-JP	137 "\|名前" // ja-JP

133 "\|Имя.*карты" // ru	138 "\|Имя.*карты" // ru

134 "\|信用卡开户名\|开户名\|持卡人姓名" // zh-CN	139 "\|信用卡开户名\|开户名\|持卡人姓名" // zh-CN

135 "\|持卡人姓名"; // zh-TW	140 "\|持卡人姓名"; // zh-TW

136 const char kNameOnCardContextualRe[] =	141 const char kNameOnCardContextualRe[] =

137 "name";	142 "name";

138 const char kCardNumberRe[] =	143 const char kCardNumberRe[] =

139 "(card\|cc\|acct).?(number\|#\|no\|num)"	144 "(card\|cc\|acct).?(number\|#\|no\|num)"

140 "\|nummer" // de-DE	145 "\|nummer" // de-DE

141 "\|credito\|numero\|número" // es	146 "\|credito\|numero\|número" // es

142 "\|numéro" // fr-FR	147 "\|numéro" // fr-FR

143 "\|カード番号" // ja-JP	148 "\|カード番号" // ja-JP

144 "\|Номер.*карты" // ru	149 "\|Номер.*карты" // ru

145 "\|信用卡号\|信用卡号码" // zh-CN	150 "\|信用卡号\|信用卡号码" // zh-CN

146 "\|信用卡卡號" // zh-TW	151 "\|信用卡卡號" // zh-TW

147 "\|카드"; // ko-KR	152 "\|카드"; // ko-KR

148 const char kCardCvcRe[] =	153 const char kCardCvcRe[] =

149 "verification\|card identification\|security code\|card code"	154 "verification\|card identification\|security code\|card code"

150 "\|cvn\|cvv\|cvc\|csc\|cvd\|cid\|ccv"	155 "\|cvn\|cvv\|cvc\|csc\|cvd\|cid\|ccv"

151 "\|\\bcid\\b";	156 "\|" WORDBREAK "cid" WORDBREAK;

152	157

153 // "Expiration date" is the most common label here, but some pages have	158 // "Expiration date" is the most common label here, but some pages have

154 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look	159 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look

155 // for the field names ccmonth and ccyear, which appear on at least 4 of	160 // for the field names ccmonth and ccyear, which appear on at least 4 of

156 // our test pages.	161 // our test pages.

157	162

158 // On at least one page (The China Shop2.html) we find only the labels	163 // On at least one page (The China Shop2.html) we find only the labels

159 // "month" and "year". So for now we match these words directly; we'll	164 // "month" and "year". So for now we match these words directly; we'll

160 // see if this turns out to be too general.	165 // see if this turns out to be too general.

161	166

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
240 "\|^nome"; // pt-BR, pt-PT	245 "\|^nome"; // pt-BR, pt-PT

241 const char kFirstNameRe[] =	246 const char kFirstNameRe[] =

242 "first.name\|initials\|fname\|first$\|given.name"	247 "first.name\|initials\|fname\|first$\|given.name"

243 "\|vorname" // de-DE	248 "\|vorname" // de-DE

244 "\|nombre" // es	249 "\|nombre" // es

245 "\|forename\|prénom\|prenom" // fr-FR	250 "\|forename\|prénom\|prenom" // fr-FR

246 "\|名" // ja-JP	251 "\|名" // ja-JP

247 "\|nome" // pt-BR, pt-PT	252 "\|nome" // pt-BR, pt-PT

248 "\|Имя" // ru	253 "\|Имя" // ru

249 "\|이름"; // ko-KR	254 "\|이름"; // ko-KR

250 const char kMiddleInitialRe[] = "middle.*initial\|m\\.i\\.\|mi$\|\\bmi\\b";	255 const char kMiddleInitialRe[] =

	256 "middle.*initial\|m\\.i\\.\|mi$\|" WORDBREAK "mi" WORDBREAK;

251 const char kMiddleNameRe[] =	257 const char kMiddleNameRe[] =

252 "middle.*name\|mname\|middle$"	258 "middle.*name\|mname\|middle$"

253 "\|apellido.?materno\|lastlastname"; // es	259 "\|apellido.?materno\|lastlastname"; // es

254 const char kLastNameRe[] =	260 const char kLastNameRe[] =

255 "last.name\|lname\|surname\|last$\|secondname\|family.name"	261 "last.name\|lname\|surname\|last$\|secondname\|family.name"

256 "\|nachname" // de-DE	262 "\|nachname" // de-DE

257 "\|apellido" // es	263 "\|apellido" // es

258 "\|famille\|^nom" // fr-FR	264 "\|famille\|^nom" // fr-FR

259 "\|cognome" // it-IT	265 "\|cognome" // it-IT

260 "\|姓" // ja-JP	266 "\|姓" // ja-JP

(...skipping 25 matching lines...) Expand all Loading...
286 "^-$\|^\\)$";	292 "^-$\|^\\)$";

287 const char kPhoneSuffixSeparatorRe[] =	293 const char kPhoneSuffixSeparatorRe[] =

288 "^-$";	294 "^-$";

289 const char kPhonePrefixRe[] =	295 const char kPhonePrefixRe[] =

290 "prefix\|exchange"	296 "prefix\|exchange"

291 "\|preselection" // fr-FR	297 "\|preselection" // fr-FR

292 "\|ddd"; // pt-BR, pt-PT	298 "\|ddd"; // pt-BR, pt-PT

293 const char kPhoneSuffixRe[] =	299 const char kPhoneSuffixRe[] =

294 "suffix";	300 "suffix";

295 const char kPhoneExtensionRe[] =	301 const char kPhoneExtensionRe[] =

296 "\\bext\|ext\\b\|extension"	302 WORDBREAK "ext\|ext" WORDBREAK "\|extension"

297 "\|ramal"; // pt-BR, pt-PT	303 "\|ramal"; // pt-BR, pt-PT

298	304

299 } // namespace autofill	305 } // namespace autofill

	306

	307 #undef WORDBREAK

OLD	NEW