OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // This file contains UTF8 strings that we want as char arrays. To avoid | 5 // This file contains UTF8 strings that we want as char arrays. To avoid |
6 // different compilers, we use a script to convert the UTF8 strings into | 6 // different compilers, we use a script to convert the UTF8 strings into |
7 // numeric literals (\x##). | 7 // numeric literals (\x##). |
8 | 8 |
9 #include "components/autofill/core/browser/autofill_regex_constants.h" | 9 #include "components/autofill/core/browser/autofill_regex_constants.h" |
10 | 10 |
11 // This macro is to workaround the fact that RE2 library only supports ASCII | |
12 // word boundaries and it is supposed to be the same as \b. | |
13 #define WORDBREAK "(\\A|\\z|\\PL)" | |
14 | |
15 namespace autofill { | 11 namespace autofill { |
16 | 12 |
17 ///////////////////////////////////////////////////////////////////////////// | 13 ///////////////////////////////////////////////////////////////////////////// |
18 // address_field.cc | 14 // address_field.cc |
19 ///////////////////////////////////////////////////////////////////////////// | 15 ///////////////////////////////////////////////////////////////////////////// |
20 const char kAttentionIgnoredRe[] = "attention|attn"; | 16 const char kAttentionIgnoredRe[] = "attention|attn"; |
21 const char kRegionIgnoredRe[] = | 17 const char kRegionIgnoredRe[] = |
22 "province|region|other" | 18 "province|region|other" |
23 "|provincia" // es | 19 "|provincia" // es |
24 "|bairro|suburb"; // pt-BR, pt-PT | 20 "|bairro|suburb"; // pt-BR, pt-PT |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
81 "|país|pais" // es | 77 "|país|pais" // es |
82 "|国" // ja-JP | 78 "|国" // ja-JP |
83 "|国家" // zh-CN | 79 "|国家" // zh-CN |
84 "|국가|나라"; // ko-KR | 80 "|국가|나라"; // ko-KR |
85 const char kCountryLocationRe[] = | 81 const char kCountryLocationRe[] = |
86 "location"; | 82 "location"; |
87 const char kZipCodeRe[] = | 83 const char kZipCodeRe[] = |
88 "zip|postal|post.*code|pcode" | 84 "zip|postal|post.*code|pcode" |
89 "|pin.?code" // en-IN | 85 "|pin.?code" // en-IN |
90 "|postleitzahl" // de-DE | 86 "|postleitzahl" // de-DE |
91 "|" WORDBREAK "cp" WORDBREAK // es | 87 "|\\bcp\\b" // es |
92 "|" WORDBREAK "cdp" WORDBREAK // fr-FR | 88 "|\\bcdp\\b" // fr-FR |
93 "|" WORDBREAK "cap" WORDBREAK // it-IT | 89 "|\\bcap\\b" // it-IT |
94 "|郵便番号" // ja-JP | 90 "|郵便番号" // ja-JP |
95 "|codigo|codpos|" WORDBREAK "cep" WORDBREAK // pt-BR, pt-PT | 91 "|codigo|codpos|\\bcep\\b" // pt-BR, pt-PT |
96 "|Почтовый.?Индекс" // ru | 92 "|Почтовый.?Индекс" // ru |
97 "|邮政编码|邮编" // zh-CN | 93 "|邮政编码|邮编" // zh-CN |
98 "|郵遞區號" // zh-TW | 94 "|郵遞區號" // zh-TW |
99 "|우편.?번호"; // ko-KR | 95 "|우편.?번호"; // ko-KR |
100 const char kZip4Re[] = | 96 const char kZip4Re[] = |
101 "zip|^-$|post2" | 97 "zip|^-$|post2" |
102 "|codpos2"; // pt-BR, pt-PT | 98 "|codpos2"; // pt-BR, pt-PT |
103 const char kCityRe[] = | 99 const char kCityRe[] = |
104 "city|town" | 100 "city|town" |
105 "|" WORDBREAK "ort" WORDBREAK "|stadt" // de-DE | 101 "|\\bort\\b|stadt" // de-DE |
106 "|suburb" // en-AU | 102 "|suburb" // en-AU |
107 "|ciudad|provincia|localidad|poblacion" // es | 103 "|ciudad|provincia|localidad|poblacion" // es |
108 "|ville|commune" // fr-FR | 104 "|ville|commune" // fr-FR |
109 "|localita" // it-IT | 105 "|localita" // it-IT |
110 "|市区町村" // ja-JP | 106 "|市区町村" // ja-JP |
111 "|cidade" // pt-BR, pt-PT | 107 "|cidade" // pt-BR, pt-PT |
112 "|Город" // ru | 108 "|Город" // ru |
113 "|市" // zh-CN | 109 "|市" // zh-CN |
114 "|分區" // zh-TW | 110 "|分區" // zh-TW |
115 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR | 111 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR |
116 const char kStateRe[] = | 112 const char kStateRe[] = |
117 "state|county|region|province" | 113 "(?<!united )state|county|region|province" |
118 "|land" // de-DE | 114 "|land" // de-DE |
119 "|county|principality" // en-UK | 115 "|county|principality" // en-UK |
120 "|都道府県" // ja-JP | 116 "|都道府県" // ja-JP |
121 "|estado|provincia" // pt-BR, pt-PT | 117 "|estado|provincia" // pt-BR, pt-PT |
122 "|область" // ru | 118 "|область" // ru |
123 "|省" // zh-CN | 119 "|省" // zh-CN |
124 "|地區" // zh-TW | 120 "|地區" // zh-TW |
125 "|^시[·・]?도"; // ko-KR | 121 "|^시[·・]?도"; // ko-KR |
126 | 122 |
127 ///////////////////////////////////////////////////////////////////////////// | 123 ///////////////////////////////////////////////////////////////////////////// |
128 // credit_card_field.cc | 124 // credit_card_field.cc |
129 ///////////////////////////////////////////////////////////////////////////// | 125 ///////////////////////////////////////////////////////////////////////////// |
130 const char kNameOnCardRe[] = | 126 const char kNameOnCardRe[] = |
131 "card.?(holder|owner)|name.*" WORDBREAK "on" WORDBREAK ".*card" | 127 "card.?(holder|owner)|name.*\\bon\\b.*card|(card|cc).?name|cc.?full.?name" |
132 "|(card|cc).?name|cc.?full.?name" | |
133 "|karteninhaber" // de-DE | 128 "|karteninhaber" // de-DE |
134 "|nombre.*tarjeta" // es | 129 "|nombre.*tarjeta" // es |
135 "|nom.*carte" // fr-FR | 130 "|nom.*carte" // fr-FR |
136 "|nome.*cart" // it-IT | 131 "|nome.*cart" // it-IT |
137 "|名前" // ja-JP | 132 "|名前" // ja-JP |
138 "|Имя.*карты" // ru | 133 "|Имя.*карты" // ru |
139 "|信用卡开户名|开户名|持卡人姓名" // zh-CN | 134 "|信用卡开户名|开户名|持卡人姓名" // zh-CN |
140 "|持卡人姓名"; // zh-TW | 135 "|持卡人姓名"; // zh-TW |
141 const char kNameOnCardContextualRe[] = | 136 const char kNameOnCardContextualRe[] = |
142 "name"; | 137 "name"; |
143 const char kCardNumberRe[] = | 138 const char kCardNumberRe[] = |
144 "(card|cc|acct).?(number|#|no|num)" | 139 "(card|cc|acct).?(number|#|no|num)" |
145 "|nummer" // de-DE | 140 "|nummer" // de-DE |
146 "|credito|numero|número" // es | 141 "|credito|numero|número" // es |
147 "|numéro" // fr-FR | 142 "|numéro" // fr-FR |
148 "|カード番号" // ja-JP | 143 "|カード番号" // ja-JP |
149 "|Номер.*карты" // ru | 144 "|Номер.*карты" // ru |
150 "|信用卡号|信用卡号码" // zh-CN | 145 "|信用卡号|信用卡号码" // zh-CN |
151 "|信用卡卡號" // zh-TW | 146 "|信用卡卡號" // zh-TW |
152 "|카드"; // ko-KR | 147 "|카드"; // ko-KR |
153 const char kCardCvcRe[] = | 148 const char kCardCvcRe[] = |
154 "verification|card identification|security code|card code" | 149 "verification|card identification|security code|card code" |
155 "|cvn|cvv|cvc|csc|cvd|cid|ccv" | 150 "|cvn|cvv|cvc|csc|cvd|cid|ccv" |
156 "|" WORDBREAK "cid" WORDBREAK; | 151 "|\\bcid\\b"; |
157 | 152 |
158 // "Expiration date" is the most common label here, but some pages have | 153 // "Expiration date" is the most common label here, but some pages have |
159 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look | 154 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look |
160 // for the field names ccmonth and ccyear, which appear on at least 4 of | 155 // for the field names ccmonth and ccyear, which appear on at least 4 of |
161 // our test pages. | 156 // our test pages. |
162 | 157 |
163 // On at least one page (The China Shop2.html) we find only the labels | 158 // On at least one page (The China Shop2.html) we find only the labels |
164 // "month" and "year". So for now we match these words directly; we'll | 159 // "month" and "year". So for now we match these words directly; we'll |
165 // see if this turns out to be too general. | 160 // see if this turns out to be too general. |
166 | 161 |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
245 "|^nome"; // pt-BR, pt-PT | 240 "|^nome"; // pt-BR, pt-PT |
246 const char kFirstNameRe[] = | 241 const char kFirstNameRe[] = |
247 "first.*name|initials|fname|first$|given.*name" | 242 "first.*name|initials|fname|first$|given.*name" |
248 "|vorname" // de-DE | 243 "|vorname" // de-DE |
249 "|nombre" // es | 244 "|nombre" // es |
250 "|forename|prénom|prenom" // fr-FR | 245 "|forename|prénom|prenom" // fr-FR |
251 "|名" // ja-JP | 246 "|名" // ja-JP |
252 "|nome" // pt-BR, pt-PT | 247 "|nome" // pt-BR, pt-PT |
253 "|Имя" // ru | 248 "|Имя" // ru |
254 "|이름"; // ko-KR | 249 "|이름"; // ko-KR |
255 const char kMiddleInitialRe[] = | 250 const char kMiddleInitialRe[] = "middle.*initial|m\\.i\\.|mi$|\\bmi\\b"; |
256 "middle.*initial|m\\.i\\.|mi$|" WORDBREAK "mi" WORDBREAK; | |
257 const char kMiddleNameRe[] = | 251 const char kMiddleNameRe[] = |
258 "middle.*name|mname|middle$" | 252 "middle.*name|mname|middle$" |
259 "|apellido.?materno|lastlastname"; // es | 253 "|apellido.?materno|lastlastname"; // es |
260 const char kLastNameRe[] = | 254 const char kLastNameRe[] = |
261 "last.*name|lname|surname|last$|secondname|family.*name" | 255 "last.*name|lname|surname|last$|secondname|family.*name" |
262 "|nachname" // de-DE | 256 "|nachname" // de-DE |
263 "|apellido" // es | 257 "|apellido" // es |
264 "|famille|^nom" // fr-FR | 258 "|famille|^nom" // fr-FR |
265 "|cognome" // it-IT | 259 "|cognome" // it-IT |
266 "|姓" // ja-JP | 260 "|姓" // ja-JP |
(...skipping 25 matching lines...) Expand all Loading... |
292 "^-$|^\\)$"; | 286 "^-$|^\\)$"; |
293 const char kPhoneSuffixSeparatorRe[] = | 287 const char kPhoneSuffixSeparatorRe[] = |
294 "^-$"; | 288 "^-$"; |
295 const char kPhonePrefixRe[] = | 289 const char kPhonePrefixRe[] = |
296 "prefix|exchange" | 290 "prefix|exchange" |
297 "|preselection" // fr-FR | 291 "|preselection" // fr-FR |
298 "|ddd"; // pt-BR, pt-PT | 292 "|ddd"; // pt-BR, pt-PT |
299 const char kPhoneSuffixRe[] = | 293 const char kPhoneSuffixRe[] = |
300 "suffix"; | 294 "suffix"; |
301 const char kPhoneExtensionRe[] = | 295 const char kPhoneExtensionRe[] = |
302 WORDBREAK "ext|ext" WORDBREAK "|extension" | 296 "\\bext|ext\\b|extension" |
303 "|ramal"; // pt-BR, pt-PT | 297 "|ramal"; // pt-BR, pt-PT |
304 | 298 |
305 } // namespace autofill | 299 } // namespace autofill |
306 | |
307 #undef WORDBREAK | |
OLD | NEW |