OLD | NEW |
| (Empty) |
1 ######################## BEGIN LICENSE BLOCK ######################## | |
2 # The Original Code is mozilla.org code. | |
3 # | |
4 # The Initial Developer of the Original Code is | |
5 # Netscape Communications Corporation. | |
6 # Portions created by the Initial Developer are Copyright (C) 1998 | |
7 # the Initial Developer. All Rights Reserved. | |
8 # | |
9 # Contributor(s): | |
10 # Mark Pilgrim - port to Python | |
11 # | |
12 # This library is free software; you can redistribute it and/or | |
13 # modify it under the terms of the GNU Lesser General Public | |
14 # License as published by the Free Software Foundation; either | |
15 # version 2.1 of the License, or (at your option) any later version. | |
16 # | |
17 # This library is distributed in the hope that it will be useful, | |
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 # Lesser General Public License for more details. | |
21 # | |
22 # You should have received a copy of the GNU Lesser General Public | |
23 # License along with this library; if not, write to the Free Software | |
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | |
25 # 02110-1301 USA | |
26 ######################### END LICENSE BLOCK ######################### | |
27 | |
28 from .constants import eStart, eError, eItsMe | |
29 | |
30 # BIG5 | |
31 | |
32 BIG5_cls = ( | |
33 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value | |
34 1,1,1,1,1,1,0,0, # 08 - 0f | |
35 1,1,1,1,1,1,1,1, # 10 - 17 | |
36 1,1,1,0,1,1,1,1, # 18 - 1f | |
37 1,1,1,1,1,1,1,1, # 20 - 27 | |
38 1,1,1,1,1,1,1,1, # 28 - 2f | |
39 1,1,1,1,1,1,1,1, # 30 - 37 | |
40 1,1,1,1,1,1,1,1, # 38 - 3f | |
41 2,2,2,2,2,2,2,2, # 40 - 47 | |
42 2,2,2,2,2,2,2,2, # 48 - 4f | |
43 2,2,2,2,2,2,2,2, # 50 - 57 | |
44 2,2,2,2,2,2,2,2, # 58 - 5f | |
45 2,2,2,2,2,2,2,2, # 60 - 67 | |
46 2,2,2,2,2,2,2,2, # 68 - 6f | |
47 2,2,2,2,2,2,2,2, # 70 - 77 | |
48 2,2,2,2,2,2,2,1, # 78 - 7f | |
49 4,4,4,4,4,4,4,4, # 80 - 87 | |
50 4,4,4,4,4,4,4,4, # 88 - 8f | |
51 4,4,4,4,4,4,4,4, # 90 - 97 | |
52 4,4,4,4,4,4,4,4, # 98 - 9f | |
53 4,3,3,3,3,3,3,3, # a0 - a7 | |
54 3,3,3,3,3,3,3,3, # a8 - af | |
55 3,3,3,3,3,3,3,3, # b0 - b7 | |
56 3,3,3,3,3,3,3,3, # b8 - bf | |
57 3,3,3,3,3,3,3,3, # c0 - c7 | |
58 3,3,3,3,3,3,3,3, # c8 - cf | |
59 3,3,3,3,3,3,3,3, # d0 - d7 | |
60 3,3,3,3,3,3,3,3, # d8 - df | |
61 3,3,3,3,3,3,3,3, # e0 - e7 | |
62 3,3,3,3,3,3,3,3, # e8 - ef | |
63 3,3,3,3,3,3,3,3, # f0 - f7 | |
64 3,3,3,3,3,3,3,0 # f8 - ff | |
65 ) | |
66 | |
67 BIG5_st = ( | |
68 eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 | |
69 eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f | |
70 eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17 | |
71 ) | |
72 | |
73 Big5CharLenTable = (0, 1, 1, 2, 0) | |
74 | |
75 Big5SMModel = {'classTable': BIG5_cls, | |
76 'classFactor': 5, | |
77 'stateTable': BIG5_st, | |
78 'charLenTable': Big5CharLenTable, | |
79 'name': 'Big5'} | |
80 | |
81 # CP949 | |
82 | |
83 CP949_cls = ( | |
84 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f | |
85 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f | |
86 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f | |
87 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f | |
88 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f | |
89 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f | |
90 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f | |
91 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f | |
92 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f | |
93 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f | |
94 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af | |
95 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf | |
96 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf | |
97 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df | |
98 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef | |
99 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff | |
100 ) | |
101 | |
102 CP949_st = ( | |
103 #cls= 0 1 2 3 4 5 6 7 8 9 # pre
vious state = | |
104 eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eSt
art | |
105 eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eEr
ror | |
106 eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eIt
sMe | |
107 eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3 | |
108 eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4 | |
109 eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5 | |
110 eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6 | |
111 ) | |
112 | |
113 CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) | |
114 | |
115 CP949SMModel = {'classTable': CP949_cls, | |
116 'classFactor': 10, | |
117 'stateTable': CP949_st, | |
118 'charLenTable': CP949CharLenTable, | |
119 'name': 'CP949'} | |
120 | |
121 # EUC-JP | |
122 | |
123 EUCJP_cls = ( | |
124 4,4,4,4,4,4,4,4, # 00 - 07 | |
125 4,4,4,4,4,4,5,5, # 08 - 0f | |
126 4,4,4,4,4,4,4,4, # 10 - 17 | |
127 4,4,4,5,4,4,4,4, # 18 - 1f | |
128 4,4,4,4,4,4,4,4, # 20 - 27 | |
129 4,4,4,4,4,4,4,4, # 28 - 2f | |
130 4,4,4,4,4,4,4,4, # 30 - 37 | |
131 4,4,4,4,4,4,4,4, # 38 - 3f | |
132 4,4,4,4,4,4,4,4, # 40 - 47 | |
133 4,4,4,4,4,4,4,4, # 48 - 4f | |
134 4,4,4,4,4,4,4,4, # 50 - 57 | |
135 4,4,4,4,4,4,4,4, # 58 - 5f | |
136 4,4,4,4,4,4,4,4, # 60 - 67 | |
137 4,4,4,4,4,4,4,4, # 68 - 6f | |
138 4,4,4,4,4,4,4,4, # 70 - 77 | |
139 4,4,4,4,4,4,4,4, # 78 - 7f | |
140 5,5,5,5,5,5,5,5, # 80 - 87 | |
141 5,5,5,5,5,5,1,3, # 88 - 8f | |
142 5,5,5,5,5,5,5,5, # 90 - 97 | |
143 5,5,5,5,5,5,5,5, # 98 - 9f | |
144 5,2,2,2,2,2,2,2, # a0 - a7 | |
145 2,2,2,2,2,2,2,2, # a8 - af | |
146 2,2,2,2,2,2,2,2, # b0 - b7 | |
147 2,2,2,2,2,2,2,2, # b8 - bf | |
148 2,2,2,2,2,2,2,2, # c0 - c7 | |
149 2,2,2,2,2,2,2,2, # c8 - cf | |
150 2,2,2,2,2,2,2,2, # d0 - d7 | |
151 2,2,2,2,2,2,2,2, # d8 - df | |
152 0,0,0,0,0,0,0,0, # e0 - e7 | |
153 0,0,0,0,0,0,0,0, # e8 - ef | |
154 0,0,0,0,0,0,0,0, # f0 - f7 | |
155 0,0,0,0,0,0,0,5 # f8 - ff | |
156 ) | |
157 | |
158 EUCJP_st = ( | |
159 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 | |
160 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f | |
161 eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 | |
162 eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f | |
163 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27 | |
164 ) | |
165 | |
166 EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) | |
167 | |
168 EUCJPSMModel = {'classTable': EUCJP_cls, | |
169 'classFactor': 6, | |
170 'stateTable': EUCJP_st, | |
171 'charLenTable': EUCJPCharLenTable, | |
172 'name': 'EUC-JP'} | |
173 | |
174 # EUC-KR | |
175 | |
176 EUCKR_cls = ( | |
177 1,1,1,1,1,1,1,1, # 00 - 07 | |
178 1,1,1,1,1,1,0,0, # 08 - 0f | |
179 1,1,1,1,1,1,1,1, # 10 - 17 | |
180 1,1,1,0,1,1,1,1, # 18 - 1f | |
181 1,1,1,1,1,1,1,1, # 20 - 27 | |
182 1,1,1,1,1,1,1,1, # 28 - 2f | |
183 1,1,1,1,1,1,1,1, # 30 - 37 | |
184 1,1,1,1,1,1,1,1, # 38 - 3f | |
185 1,1,1,1,1,1,1,1, # 40 - 47 | |
186 1,1,1,1,1,1,1,1, # 48 - 4f | |
187 1,1,1,1,1,1,1,1, # 50 - 57 | |
188 1,1,1,1,1,1,1,1, # 58 - 5f | |
189 1,1,1,1,1,1,1,1, # 60 - 67 | |
190 1,1,1,1,1,1,1,1, # 68 - 6f | |
191 1,1,1,1,1,1,1,1, # 70 - 77 | |
192 1,1,1,1,1,1,1,1, # 78 - 7f | |
193 0,0,0,0,0,0,0,0, # 80 - 87 | |
194 0,0,0,0,0,0,0,0, # 88 - 8f | |
195 0,0,0,0,0,0,0,0, # 90 - 97 | |
196 0,0,0,0,0,0,0,0, # 98 - 9f | |
197 0,2,2,2,2,2,2,2, # a0 - a7 | |
198 2,2,2,2,2,3,3,3, # a8 - af | |
199 2,2,2,2,2,2,2,2, # b0 - b7 | |
200 2,2,2,2,2,2,2,2, # b8 - bf | |
201 2,2,2,2,2,2,2,2, # c0 - c7 | |
202 2,3,2,2,2,2,2,2, # c8 - cf | |
203 2,2,2,2,2,2,2,2, # d0 - d7 | |
204 2,2,2,2,2,2,2,2, # d8 - df | |
205 2,2,2,2,2,2,2,2, # e0 - e7 | |
206 2,2,2,2,2,2,2,2, # e8 - ef | |
207 2,2,2,2,2,2,2,2, # f0 - f7 | |
208 2,2,2,2,2,2,2,0 # f8 - ff | |
209 ) | |
210 | |
211 EUCKR_st = ( | |
212 eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 | |
213 eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f | |
214 ) | |
215 | |
216 EUCKRCharLenTable = (0, 1, 2, 0) | |
217 | |
218 EUCKRSMModel = {'classTable': EUCKR_cls, | |
219 'classFactor': 4, | |
220 'stateTable': EUCKR_st, | |
221 'charLenTable': EUCKRCharLenTable, | |
222 'name': 'EUC-KR'} | |
223 | |
224 # EUC-TW | |
225 | |
226 EUCTW_cls = ( | |
227 2,2,2,2,2,2,2,2, # 00 - 07 | |
228 2,2,2,2,2,2,0,0, # 08 - 0f | |
229 2,2,2,2,2,2,2,2, # 10 - 17 | |
230 2,2,2,0,2,2,2,2, # 18 - 1f | |
231 2,2,2,2,2,2,2,2, # 20 - 27 | |
232 2,2,2,2,2,2,2,2, # 28 - 2f | |
233 2,2,2,2,2,2,2,2, # 30 - 37 | |
234 2,2,2,2,2,2,2,2, # 38 - 3f | |
235 2,2,2,2,2,2,2,2, # 40 - 47 | |
236 2,2,2,2,2,2,2,2, # 48 - 4f | |
237 2,2,2,2,2,2,2,2, # 50 - 57 | |
238 2,2,2,2,2,2,2,2, # 58 - 5f | |
239 2,2,2,2,2,2,2,2, # 60 - 67 | |
240 2,2,2,2,2,2,2,2, # 68 - 6f | |
241 2,2,2,2,2,2,2,2, # 70 - 77 | |
242 2,2,2,2,2,2,2,2, # 78 - 7f | |
243 0,0,0,0,0,0,0,0, # 80 - 87 | |
244 0,0,0,0,0,0,6,0, # 88 - 8f | |
245 0,0,0,0,0,0,0,0, # 90 - 97 | |
246 0,0,0,0,0,0,0,0, # 98 - 9f | |
247 0,3,4,4,4,4,4,4, # a0 - a7 | |
248 5,5,1,1,1,1,1,1, # a8 - af | |
249 1,1,1,1,1,1,1,1, # b0 - b7 | |
250 1,1,1,1,1,1,1,1, # b8 - bf | |
251 1,1,3,1,3,3,3,3, # c0 - c7 | |
252 3,3,3,3,3,3,3,3, # c8 - cf | |
253 3,3,3,3,3,3,3,3, # d0 - d7 | |
254 3,3,3,3,3,3,3,3, # d8 - df | |
255 3,3,3,3,3,3,3,3, # e0 - e7 | |
256 3,3,3,3,3,3,3,3, # e8 - ef | |
257 3,3,3,3,3,3,3,3, # f0 - f7 | |
258 3,3,3,3,3,3,3,0 # f8 - ff | |
259 ) | |
260 | |
261 EUCTW_st = ( | |
262 eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 | |
263 eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f | |
264 eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 | |
265 eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f | |
266 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 | |
267 eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f | |
268 ) | |
269 | |
270 EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) | |
271 | |
272 EUCTWSMModel = {'classTable': EUCTW_cls, | |
273 'classFactor': 7, | |
274 'stateTable': EUCTW_st, | |
275 'charLenTable': EUCTWCharLenTable, | |
276 'name': 'x-euc-tw'} | |
277 | |
278 # GB2312 | |
279 | |
280 GB2312_cls = ( | |
281 1,1,1,1,1,1,1,1, # 00 - 07 | |
282 1,1,1,1,1,1,0,0, # 08 - 0f | |
283 1,1,1,1,1,1,1,1, # 10 - 17 | |
284 1,1,1,0,1,1,1,1, # 18 - 1f | |
285 1,1,1,1,1,1,1,1, # 20 - 27 | |
286 1,1,1,1,1,1,1,1, # 28 - 2f | |
287 3,3,3,3,3,3,3,3, # 30 - 37 | |
288 3,3,1,1,1,1,1,1, # 38 - 3f | |
289 2,2,2,2,2,2,2,2, # 40 - 47 | |
290 2,2,2,2,2,2,2,2, # 48 - 4f | |
291 2,2,2,2,2,2,2,2, # 50 - 57 | |
292 2,2,2,2,2,2,2,2, # 58 - 5f | |
293 2,2,2,2,2,2,2,2, # 60 - 67 | |
294 2,2,2,2,2,2,2,2, # 68 - 6f | |
295 2,2,2,2,2,2,2,2, # 70 - 77 | |
296 2,2,2,2,2,2,2,4, # 78 - 7f | |
297 5,6,6,6,6,6,6,6, # 80 - 87 | |
298 6,6,6,6,6,6,6,6, # 88 - 8f | |
299 6,6,6,6,6,6,6,6, # 90 - 97 | |
300 6,6,6,6,6,6,6,6, # 98 - 9f | |
301 6,6,6,6,6,6,6,6, # a0 - a7 | |
302 6,6,6,6,6,6,6,6, # a8 - af | |
303 6,6,6,6,6,6,6,6, # b0 - b7 | |
304 6,6,6,6,6,6,6,6, # b8 - bf | |
305 6,6,6,6,6,6,6,6, # c0 - c7 | |
306 6,6,6,6,6,6,6,6, # c8 - cf | |
307 6,6,6,6,6,6,6,6, # d0 - d7 | |
308 6,6,6,6,6,6,6,6, # d8 - df | |
309 6,6,6,6,6,6,6,6, # e0 - e7 | |
310 6,6,6,6,6,6,6,6, # e8 - ef | |
311 6,6,6,6,6,6,6,6, # f0 - f7 | |
312 6,6,6,6,6,6,6,0 # f8 - ff | |
313 ) | |
314 | |
315 GB2312_st = ( | |
316 eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 | |
317 eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f | |
318 eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 | |
319 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f | |
320 eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 | |
321 eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f | |
322 ) | |
323 | |
324 # To be accurate, the length of class 6 can be either 2 or 4. | |
325 # But it is not necessary to discriminate between the two since | |
326 # it is used for frequency analysis only, and we are validing | |
327 # each code range there as well. So it is safe to set it to be | |
328 # 2 here. | |
329 GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) | |
330 | |
331 GB2312SMModel = {'classTable': GB2312_cls, | |
332 'classFactor': 7, | |
333 'stateTable': GB2312_st, | |
334 'charLenTable': GB2312CharLenTable, | |
335 'name': 'GB2312'} | |
336 | |
337 # Shift_JIS | |
338 | |
339 SJIS_cls = ( | |
340 1,1,1,1,1,1,1,1, # 00 - 07 | |
341 1,1,1,1,1,1,0,0, # 08 - 0f | |
342 1,1,1,1,1,1,1,1, # 10 - 17 | |
343 1,1,1,0,1,1,1,1, # 18 - 1f | |
344 1,1,1,1,1,1,1,1, # 20 - 27 | |
345 1,1,1,1,1,1,1,1, # 28 - 2f | |
346 1,1,1,1,1,1,1,1, # 30 - 37 | |
347 1,1,1,1,1,1,1,1, # 38 - 3f | |
348 2,2,2,2,2,2,2,2, # 40 - 47 | |
349 2,2,2,2,2,2,2,2, # 48 - 4f | |
350 2,2,2,2,2,2,2,2, # 50 - 57 | |
351 2,2,2,2,2,2,2,2, # 58 - 5f | |
352 2,2,2,2,2,2,2,2, # 60 - 67 | |
353 2,2,2,2,2,2,2,2, # 68 - 6f | |
354 2,2,2,2,2,2,2,2, # 70 - 77 | |
355 2,2,2,2,2,2,2,1, # 78 - 7f | |
356 3,3,3,3,3,3,3,3, # 80 - 87 | |
357 3,3,3,3,3,3,3,3, # 88 - 8f | |
358 3,3,3,3,3,3,3,3, # 90 - 97 | |
359 3,3,3,3,3,3,3,3, # 98 - 9f | |
360 #0xa0 is illegal in sjis encoding, but some pages does | |
361 #contain such byte. We need to be more error forgiven. | |
362 2,2,2,2,2,2,2,2, # a0 - a7 | |
363 2,2,2,2,2,2,2,2, # a8 - af | |
364 2,2,2,2,2,2,2,2, # b0 - b7 | |
365 2,2,2,2,2,2,2,2, # b8 - bf | |
366 2,2,2,2,2,2,2,2, # c0 - c7 | |
367 2,2,2,2,2,2,2,2, # c8 - cf | |
368 2,2,2,2,2,2,2,2, # d0 - d7 | |
369 2,2,2,2,2,2,2,2, # d8 - df | |
370 3,3,3,3,3,3,3,3, # e0 - e7 | |
371 3,3,3,3,3,4,4,4, # e8 - ef | |
372 4,4,4,4,4,4,4,4, # f0 - f7 | |
373 4,4,4,4,4,0,0,0 # f8 - ff | |
374 ) | |
375 | |
376 | |
377 SJIS_st = ( | |
378 eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 | |
379 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f | |
380 eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17 | |
381 ) | |
382 | |
383 SJISCharLenTable = (0, 1, 1, 2, 0, 0) | |
384 | |
385 SJISSMModel = {'classTable': SJIS_cls, | |
386 'classFactor': 6, | |
387 'stateTable': SJIS_st, | |
388 'charLenTable': SJISCharLenTable, | |
389 'name': 'Shift_JIS'} | |
390 | |
391 # UCS2-BE | |
392 | |
393 UCS2BE_cls = ( | |
394 0,0,0,0,0,0,0,0, # 00 - 07 | |
395 0,0,1,0,0,2,0,0, # 08 - 0f | |
396 0,0,0,0,0,0,0,0, # 10 - 17 | |
397 0,0,0,3,0,0,0,0, # 18 - 1f | |
398 0,0,0,0,0,0,0,0, # 20 - 27 | |
399 0,3,3,3,3,3,0,0, # 28 - 2f | |
400 0,0,0,0,0,0,0,0, # 30 - 37 | |
401 0,0,0,0,0,0,0,0, # 38 - 3f | |
402 0,0,0,0,0,0,0,0, # 40 - 47 | |
403 0,0,0,0,0,0,0,0, # 48 - 4f | |
404 0,0,0,0,0,0,0,0, # 50 - 57 | |
405 0,0,0,0,0,0,0,0, # 58 - 5f | |
406 0,0,0,0,0,0,0,0, # 60 - 67 | |
407 0,0,0,0,0,0,0,0, # 68 - 6f | |
408 0,0,0,0,0,0,0,0, # 70 - 77 | |
409 0,0,0,0,0,0,0,0, # 78 - 7f | |
410 0,0,0,0,0,0,0,0, # 80 - 87 | |
411 0,0,0,0,0,0,0,0, # 88 - 8f | |
412 0,0,0,0,0,0,0,0, # 90 - 97 | |
413 0,0,0,0,0,0,0,0, # 98 - 9f | |
414 0,0,0,0,0,0,0,0, # a0 - a7 | |
415 0,0,0,0,0,0,0,0, # a8 - af | |
416 0,0,0,0,0,0,0,0, # b0 - b7 | |
417 0,0,0,0,0,0,0,0, # b8 - bf | |
418 0,0,0,0,0,0,0,0, # c0 - c7 | |
419 0,0,0,0,0,0,0,0, # c8 - cf | |
420 0,0,0,0,0,0,0,0, # d0 - d7 | |
421 0,0,0,0,0,0,0,0, # d8 - df | |
422 0,0,0,0,0,0,0,0, # e0 - e7 | |
423 0,0,0,0,0,0,0,0, # e8 - ef | |
424 0,0,0,0,0,0,0,0, # f0 - f7 | |
425 0,0,0,0,0,0,4,5 # f8 - ff | |
426 ) | |
427 | |
428 UCS2BE_st = ( | |
429 5, 7, 7,eError, 4, 3,eError,eError,#00-07 | |
430 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f | |
431 eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 | |
432 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f | |
433 6, 6, 6, 6, 5, 7, 7,eError,#20-27 | |
434 5, 8, 6, 6,eError, 6, 6, 6,#28-2f | |
435 6, 6, 6, 6,eError,eError,eStart,eStart #30-37 | |
436 ) | |
437 | |
438 UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) | |
439 | |
440 UCS2BESMModel = {'classTable': UCS2BE_cls, | |
441 'classFactor': 6, | |
442 'stateTable': UCS2BE_st, | |
443 'charLenTable': UCS2BECharLenTable, | |
444 'name': 'UTF-16BE'} | |
445 | |
446 # UCS2-LE | |
447 | |
448 UCS2LE_cls = ( | |
449 0,0,0,0,0,0,0,0, # 00 - 07 | |
450 0,0,1,0,0,2,0,0, # 08 - 0f | |
451 0,0,0,0,0,0,0,0, # 10 - 17 | |
452 0,0,0,3,0,0,0,0, # 18 - 1f | |
453 0,0,0,0,0,0,0,0, # 20 - 27 | |
454 0,3,3,3,3,3,0,0, # 28 - 2f | |
455 0,0,0,0,0,0,0,0, # 30 - 37 | |
456 0,0,0,0,0,0,0,0, # 38 - 3f | |
457 0,0,0,0,0,0,0,0, # 40 - 47 | |
458 0,0,0,0,0,0,0,0, # 48 - 4f | |
459 0,0,0,0,0,0,0,0, # 50 - 57 | |
460 0,0,0,0,0,0,0,0, # 58 - 5f | |
461 0,0,0,0,0,0,0,0, # 60 - 67 | |
462 0,0,0,0,0,0,0,0, # 68 - 6f | |
463 0,0,0,0,0,0,0,0, # 70 - 77 | |
464 0,0,0,0,0,0,0,0, # 78 - 7f | |
465 0,0,0,0,0,0,0,0, # 80 - 87 | |
466 0,0,0,0,0,0,0,0, # 88 - 8f | |
467 0,0,0,0,0,0,0,0, # 90 - 97 | |
468 0,0,0,0,0,0,0,0, # 98 - 9f | |
469 0,0,0,0,0,0,0,0, # a0 - a7 | |
470 0,0,0,0,0,0,0,0, # a8 - af | |
471 0,0,0,0,0,0,0,0, # b0 - b7 | |
472 0,0,0,0,0,0,0,0, # b8 - bf | |
473 0,0,0,0,0,0,0,0, # c0 - c7 | |
474 0,0,0,0,0,0,0,0, # c8 - cf | |
475 0,0,0,0,0,0,0,0, # d0 - d7 | |
476 0,0,0,0,0,0,0,0, # d8 - df | |
477 0,0,0,0,0,0,0,0, # e0 - e7 | |
478 0,0,0,0,0,0,0,0, # e8 - ef | |
479 0,0,0,0,0,0,0,0, # f0 - f7 | |
480 0,0,0,0,0,0,4,5 # f8 - ff | |
481 ) | |
482 | |
483 UCS2LE_st = ( | |
484 6, 6, 7, 6, 4, 3,eError,eError,#00-07 | |
485 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f | |
486 eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 | |
487 5, 5, 5,eError, 5,eError, 6, 6,#18-1f | |
488 7, 6, 8, 8, 5, 5, 5,eError,#20-27 | |
489 5, 5, 5,eError,eError,eError, 5, 5,#28-2f | |
490 5, 5, 5,eError, 5,eError,eStart,eStart #30-37 | |
491 ) | |
492 | |
493 UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) | |
494 | |
495 UCS2LESMModel = {'classTable': UCS2LE_cls, | |
496 'classFactor': 6, | |
497 'stateTable': UCS2LE_st, | |
498 'charLenTable': UCS2LECharLenTable, | |
499 'name': 'UTF-16LE'} | |
500 | |
501 # UTF-8 | |
502 | |
503 UTF8_cls = ( | |
504 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value | |
505 1,1,1,1,1,1,0,0, # 08 - 0f | |
506 1,1,1,1,1,1,1,1, # 10 - 17 | |
507 1,1,1,0,1,1,1,1, # 18 - 1f | |
508 1,1,1,1,1,1,1,1, # 20 - 27 | |
509 1,1,1,1,1,1,1,1, # 28 - 2f | |
510 1,1,1,1,1,1,1,1, # 30 - 37 | |
511 1,1,1,1,1,1,1,1, # 38 - 3f | |
512 1,1,1,1,1,1,1,1, # 40 - 47 | |
513 1,1,1,1,1,1,1,1, # 48 - 4f | |
514 1,1,1,1,1,1,1,1, # 50 - 57 | |
515 1,1,1,1,1,1,1,1, # 58 - 5f | |
516 1,1,1,1,1,1,1,1, # 60 - 67 | |
517 1,1,1,1,1,1,1,1, # 68 - 6f | |
518 1,1,1,1,1,1,1,1, # 70 - 77 | |
519 1,1,1,1,1,1,1,1, # 78 - 7f | |
520 2,2,2,2,3,3,3,3, # 80 - 87 | |
521 4,4,4,4,4,4,4,4, # 88 - 8f | |
522 4,4,4,4,4,4,4,4, # 90 - 97 | |
523 4,4,4,4,4,4,4,4, # 98 - 9f | |
524 5,5,5,5,5,5,5,5, # a0 - a7 | |
525 5,5,5,5,5,5,5,5, # a8 - af | |
526 5,5,5,5,5,5,5,5, # b0 - b7 | |
527 5,5,5,5,5,5,5,5, # b8 - bf | |
528 0,0,6,6,6,6,6,6, # c0 - c7 | |
529 6,6,6,6,6,6,6,6, # c8 - cf | |
530 6,6,6,6,6,6,6,6, # d0 - d7 | |
531 6,6,6,6,6,6,6,6, # d8 - df | |
532 7,8,8,8,8,8,8,8, # e0 - e7 | |
533 8,8,8,8,8,9,8,8, # e8 - ef | |
534 10,11,11,11,11,11,11,11, # f0 - f7 | |
535 12,13,13,13,14,15,0,0 # f8 - ff | |
536 ) | |
537 | |
538 UTF8_st = ( | |
539 eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 | |
540 9, 11, 8, 7, 6, 5, 4, 3,#08-0f | |
541 eError,eError,eError,eError,eError,eError,eError,eError,#10-17 | |
542 eError,eError,eError,eError,eError,eError,eError,eError,#18-1f | |
543 eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 | |
544 eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f | |
545 eError,eError, 5, 5, 5, 5,eError,eError,#30-37 | |
546 eError,eError,eError,eError,eError,eError,eError,eError,#38-3f | |
547 eError,eError,eError, 5, 5, 5,eError,eError,#40-47 | |
548 eError,eError,eError,eError,eError,eError,eError,eError,#48-4f | |
549 eError,eError, 7, 7, 7, 7,eError,eError,#50-57 | |
550 eError,eError,eError,eError,eError,eError,eError,eError,#58-5f | |
551 eError,eError,eError,eError, 7, 7,eError,eError,#60-67 | |
552 eError,eError,eError,eError,eError,eError,eError,eError,#68-6f | |
553 eError,eError, 9, 9, 9, 9,eError,eError,#70-77 | |
554 eError,eError,eError,eError,eError,eError,eError,eError,#78-7f | |
555 eError,eError,eError,eError,eError, 9,eError,eError,#80-87 | |
556 eError,eError,eError,eError,eError,eError,eError,eError,#88-8f | |
557 eError,eError, 12, 12, 12, 12,eError,eError,#90-97 | |
558 eError,eError,eError,eError,eError,eError,eError,eError,#98-9f | |
559 eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 | |
560 eError,eError,eError,eError,eError,eError,eError,eError,#a8-af | |
561 eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 | |
562 eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf | |
563 eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 | |
564 eError,eError,eError,eError,eError,eError,eError,eError #c8-cf | |
565 ) | |
566 | |
567 UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) | |
568 | |
569 UTF8SMModel = {'classTable': UTF8_cls, | |
570 'classFactor': 16, | |
571 'stateTable': UTF8_st, | |
572 'charLenTable': UTF8CharLenTable, | |
573 'name': 'UTF-8'} | |
574 | |
575 # flake8: noqa | |
OLD | NEW |