OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // This file contains UTF8 strings that we want as char arrays. To avoid | 5 // This file contains UTF8 strings that we want as char arrays. To avoid |
6 // different compilers, we use a script to convert the UTF8 strings into | 6 // different compilers, we use a script to convert the UTF8 strings into |
7 // numeric literals (\x##). | 7 // numeric literals (\x##). |
8 | 8 |
9 #include "components/autofill/core/browser/autofill_regex_constants.h" | 9 #include "components/autofill/core/browser/autofill_regex_constants.h" |
10 | 10 |
| 11 // This macro is to workaround the fact that RE2 library only supports ASCII |
| 12 // word boundaries and it is supposed to be the same as \b. |
| 13 #define WORDBREAK "(\\A|\\z|\\PL)" |
| 14 |
11 namespace autofill { | 15 namespace autofill { |
12 | 16 |
13 ///////////////////////////////////////////////////////////////////////////// | 17 ///////////////////////////////////////////////////////////////////////////// |
14 // address_field.cc | 18 // address_field.cc |
15 ///////////////////////////////////////////////////////////////////////////// | 19 ///////////////////////////////////////////////////////////////////////////// |
16 const char kAttentionIgnoredRe[] = "attention|attn"; | 20 const char kAttentionIgnoredRe[] = "attention|attn"; |
17 const char kRegionIgnoredRe[] = | 21 const char kRegionIgnoredRe[] = |
18 "province|region|other" | 22 "province|region|other" |
19 "|provincia" // es | 23 "|provincia" // es |
20 "|bairro|suburb"; // pt-BR, pt-PT | 24 "|bairro|suburb"; // pt-BR, pt-PT |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
77 "|país|pais" // es | 81 "|país|pais" // es |
78 "|国" // ja-JP | 82 "|国" // ja-JP |
79 "|国家" // zh-CN | 83 "|国家" // zh-CN |
80 "|국가|나라"; // ko-KR | 84 "|국가|나라"; // ko-KR |
81 const char kCountryLocationRe[] = | 85 const char kCountryLocationRe[] = |
82 "location"; | 86 "location"; |
83 const char kZipCodeRe[] = | 87 const char kZipCodeRe[] = |
84 "zip|postal|post.*code|pcode" | 88 "zip|postal|post.*code|pcode" |
85 "|pin.?code" // en-IN | 89 "|pin.?code" // en-IN |
86 "|postleitzahl" // de-DE | 90 "|postleitzahl" // de-DE |
87 "|\\bcp\\b" // es | 91 "|" WORDBREAK "cp" WORDBREAK // es |
88 "|\\bcdp\\b" // fr-FR | 92 "|" WORDBREAK "cdp" WORDBREAK // fr-FR |
89 "|\\bcap\\b" // it-IT | 93 "|" WORDBREAK "cap" WORDBREAK // it-IT |
90 "|郵便番号" // ja-JP | 94 "|郵便番号" // ja-JP |
91 "|codigo|codpos|\\bcep\\b" // pt-BR, pt-PT | 95 "|codigo|codpos|" WORDBREAK "cep" WORDBREAK // pt-BR, pt-PT |
92 "|Почтовый.?Индекс" // ru | 96 "|Почтовый.?Индекс" // ru |
93 "|邮政编码|邮编" // zh-CN | 97 "|邮政编码|邮编" // zh-CN |
94 "|郵遞區號" // zh-TW | 98 "|郵遞區號" // zh-TW |
95 "|우편.?번호"; // ko-KR | 99 "|우편.?번호"; // ko-KR |
96 const char kZip4Re[] = | 100 const char kZip4Re[] = |
97 "zip|^-$|post2" | 101 "zip|^-$|post2" |
98 "|codpos2"; // pt-BR, pt-PT | 102 "|codpos2"; // pt-BR, pt-PT |
99 const char kCityRe[] = | 103 const char kCityRe[] = |
100 "city|town" | 104 "city|town" |
101 "|\\bort\\b|stadt" // de-DE | 105 "|" WORDBREAK "ort" WORDBREAK "|stadt" // de-DE |
102 "|suburb" // en-AU | 106 "|suburb" // en-AU |
103 "|ciudad|provincia|localidad|poblacion" // es | 107 "|ciudad|provincia|localidad|poblacion" // es |
104 "|ville|commune" // fr-FR | 108 "|ville|commune" // fr-FR |
105 "|localita" // it-IT | 109 "|localita" // it-IT |
106 "|市区町村" // ja-JP | 110 "|市区町村" // ja-JP |
107 "|cidade" // pt-BR, pt-PT | 111 "|cidade" // pt-BR, pt-PT |
108 "|Город" // ru | 112 "|Город" // ru |
109 "|市" // zh-CN | 113 "|市" // zh-CN |
110 "|分區" // zh-TW | 114 "|分區" // zh-TW |
111 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR | 115 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR |
112 const char kStateRe[] = | 116 const char kStateRe[] = |
113 "(?<!united )state|county|region|province" | 117 "state|county|region|province" |
114 "|land" // de-DE | 118 "|land" // de-DE |
115 "|county|principality" // en-UK | 119 "|county|principality" // en-UK |
116 "|都道府県" // ja-JP | 120 "|都道府県" // ja-JP |
117 "|estado|provincia" // pt-BR, pt-PT | 121 "|estado|provincia" // pt-BR, pt-PT |
118 "|область" // ru | 122 "|область" // ru |
119 "|省" // zh-CN | 123 "|省" // zh-CN |
120 "|地區" // zh-TW | 124 "|地區" // zh-TW |
121 "|^시[·・]?도"; // ko-KR | 125 "|^시[·・]?도"; // ko-KR |
122 | 126 |
123 ///////////////////////////////////////////////////////////////////////////// | 127 ///////////////////////////////////////////////////////////////////////////// |
124 // credit_card_field.cc | 128 // credit_card_field.cc |
125 ///////////////////////////////////////////////////////////////////////////// | 129 ///////////////////////////////////////////////////////////////////////////// |
126 const char kNameOnCardRe[] = | 130 const char kNameOnCardRe[] = |
127 "card.?(holder|owner)|name.*\\bon\\b.*card|(card|cc).?name|cc.?full.?name" | 131 "card.?(holder|owner)|name.*" WORDBREAK "on" WORDBREAK ".*card" |
| 132 "|(card|cc).?name|cc.?full.?name" |
128 "|karteninhaber" // de-DE | 133 "|karteninhaber" // de-DE |
129 "|nombre.*tarjeta" // es | 134 "|nombre.*tarjeta" // es |
130 "|nom.*carte" // fr-FR | 135 "|nom.*carte" // fr-FR |
131 "|nome.*cart" // it-IT | 136 "|nome.*cart" // it-IT |
132 "|名前" // ja-JP | 137 "|名前" // ja-JP |
133 "|Имя.*карты" // ru | 138 "|Имя.*карты" // ru |
134 "|信用卡开户名|开户名|持卡人姓名" // zh-CN | 139 "|信用卡开户名|开户名|持卡人姓名" // zh-CN |
135 "|持卡人姓名"; // zh-TW | 140 "|持卡人姓名"; // zh-TW |
136 const char kNameOnCardContextualRe[] = | 141 const char kNameOnCardContextualRe[] = |
137 "name"; | 142 "name"; |
138 const char kCardNumberRe[] = | 143 const char kCardNumberRe[] = |
139 "(card|cc|acct).?(number|#|no|num)" | 144 "(card|cc|acct).?(number|#|no|num)" |
140 "|nummer" // de-DE | 145 "|nummer" // de-DE |
141 "|credito|numero|número" // es | 146 "|credito|numero|número" // es |
142 "|numéro" // fr-FR | 147 "|numéro" // fr-FR |
143 "|カード番号" // ja-JP | 148 "|カード番号" // ja-JP |
144 "|Номер.*карты" // ru | 149 "|Номер.*карты" // ru |
145 "|信用卡号|信用卡号码" // zh-CN | 150 "|信用卡号|信用卡号码" // zh-CN |
146 "|信用卡卡號" // zh-TW | 151 "|信用卡卡號" // zh-TW |
147 "|카드"; // ko-KR | 152 "|카드"; // ko-KR |
148 const char kCardCvcRe[] = | 153 const char kCardCvcRe[] = |
149 "verification|card identification|security code|card code" | 154 "verification|card identification|security code|card code" |
150 "|cvn|cvv|cvc|csc|cvd|cid|ccv" | 155 "|cvn|cvv|cvc|csc|cvd|cid|ccv" |
151 "|\\bcid\\b"; | 156 "|" WORDBREAK "cid" WORDBREAK; |
152 | 157 |
153 // "Expiration date" is the most common label here, but some pages have | 158 // "Expiration date" is the most common label here, but some pages have |
154 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look | 159 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look |
155 // for the field names ccmonth and ccyear, which appear on at least 4 of | 160 // for the field names ccmonth and ccyear, which appear on at least 4 of |
156 // our test pages. | 161 // our test pages. |
157 | 162 |
158 // On at least one page (The China Shop2.html) we find only the labels | 163 // On at least one page (The China Shop2.html) we find only the labels |
159 // "month" and "year". So for now we match these words directly; we'll | 164 // "month" and "year". So for now we match these words directly; we'll |
160 // see if this turns out to be too general. | 165 // see if this turns out to be too general. |
161 | 166 |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
240 "|^nome"; // pt-BR, pt-PT | 245 "|^nome"; // pt-BR, pt-PT |
241 const char kFirstNameRe[] = | 246 const char kFirstNameRe[] = |
242 "first.*name|initials|fname|first$|given.*name" | 247 "first.*name|initials|fname|first$|given.*name" |
243 "|vorname" // de-DE | 248 "|vorname" // de-DE |
244 "|nombre" // es | 249 "|nombre" // es |
245 "|forename|prénom|prenom" // fr-FR | 250 "|forename|prénom|prenom" // fr-FR |
246 "|名" // ja-JP | 251 "|名" // ja-JP |
247 "|nome" // pt-BR, pt-PT | 252 "|nome" // pt-BR, pt-PT |
248 "|Имя" // ru | 253 "|Имя" // ru |
249 "|이름"; // ko-KR | 254 "|이름"; // ko-KR |
250 const char kMiddleInitialRe[] = "middle.*initial|m\\.i\\.|mi$|\\bmi\\b"; | 255 const char kMiddleInitialRe[] = |
| 256 "middle.*initial|m\\.i\\.|mi$|" WORDBREAK "mi" WORDBREAK; |
251 const char kMiddleNameRe[] = | 257 const char kMiddleNameRe[] = |
252 "middle.*name|mname|middle$" | 258 "middle.*name|mname|middle$" |
253 "|apellido.?materno|lastlastname"; // es | 259 "|apellido.?materno|lastlastname"; // es |
254 const char kLastNameRe[] = | 260 const char kLastNameRe[] = |
255 "last.*name|lname|surname|last$|secondname|family.*name" | 261 "last.*name|lname|surname|last$|secondname|family.*name" |
256 "|nachname" // de-DE | 262 "|nachname" // de-DE |
257 "|apellido" // es | 263 "|apellido" // es |
258 "|famille|^nom" // fr-FR | 264 "|famille|^nom" // fr-FR |
259 "|cognome" // it-IT | 265 "|cognome" // it-IT |
260 "|姓" // ja-JP | 266 "|姓" // ja-JP |
(...skipping 25 matching lines...) Expand all Loading... |
286 "^-$|^\\)$"; | 292 "^-$|^\\)$"; |
287 const char kPhoneSuffixSeparatorRe[] = | 293 const char kPhoneSuffixSeparatorRe[] = |
288 "^-$"; | 294 "^-$"; |
289 const char kPhonePrefixRe[] = | 295 const char kPhonePrefixRe[] = |
290 "prefix|exchange" | 296 "prefix|exchange" |
291 "|preselection" // fr-FR | 297 "|preselection" // fr-FR |
292 "|ddd"; // pt-BR, pt-PT | 298 "|ddd"; // pt-BR, pt-PT |
293 const char kPhoneSuffixRe[] = | 299 const char kPhoneSuffixRe[] = |
294 "suffix"; | 300 "suffix"; |
295 const char kPhoneExtensionRe[] = | 301 const char kPhoneExtensionRe[] = |
296 "\\bext|ext\\b|extension" | 302 WORDBREAK "ext|ext" WORDBREAK "|extension" |
297 "|ramal"; // pt-BR, pt-PT | 303 "|ramal"; // pt-BR, pt-PT |
298 | 304 |
299 } // namespace autofill | 305 } // namespace autofill |
| 306 |
| 307 #undef WORDBREAK |
OLD | NEW |