| OLD | NEW | 
|---|
|  | (Empty) | 
| 1 ######################## BEGIN LICENSE BLOCK ######################## |  | 
| 2 # The Original Code is mozilla.org code. |  | 
| 3 # |  | 
| 4 # The Initial Developer of the Original Code is |  | 
| 5 # Netscape Communications Corporation. |  | 
| 6 # Portions created by the Initial Developer are Copyright (C) 1998 |  | 
| 7 # the Initial Developer. All Rights Reserved. |  | 
| 8 # |  | 
| 9 # Contributor(s): |  | 
| 10 #   Mark Pilgrim - port to Python |  | 
| 11 # |  | 
| 12 # This library is free software; you can redistribute it and/or |  | 
| 13 # modify it under the terms of the GNU Lesser General Public |  | 
| 14 # License as published by the Free Software Foundation; either |  | 
| 15 # version 2.1 of the License, or (at your option) any later version. |  | 
| 16 # |  | 
| 17 # This library is distributed in the hope that it will be useful, |  | 
| 18 # but WITHOUT ANY WARRANTY; without even the implied warranty of |  | 
| 19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU |  | 
| 20 # Lesser General Public License for more details. |  | 
| 21 # |  | 
| 22 # You should have received a copy of the GNU Lesser General Public |  | 
| 23 # License along with this library; if not, write to the Free Software |  | 
| 24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |  | 
| 25 # 02110-1301  USA |  | 
| 26 ######################### END LICENSE BLOCK ######################### |  | 
| 27 |  | 
| 28 from .constants import eStart, eError, eItsMe |  | 
| 29 |  | 
| 30 # BIG5 |  | 
| 31 |  | 
| 32 BIG5_cls = ( |  | 
| 33     1,1,1,1,1,1,1,1,  # 00 - 07    #allow 0x00 as legal value |  | 
| 34     1,1,1,1,1,1,0,0,  # 08 - 0f |  | 
| 35     1,1,1,1,1,1,1,1,  # 10 - 17 |  | 
| 36     1,1,1,0,1,1,1,1,  # 18 - 1f |  | 
| 37     1,1,1,1,1,1,1,1,  # 20 - 27 |  | 
| 38     1,1,1,1,1,1,1,1,  # 28 - 2f |  | 
| 39     1,1,1,1,1,1,1,1,  # 30 - 37 |  | 
| 40     1,1,1,1,1,1,1,1,  # 38 - 3f |  | 
| 41     2,2,2,2,2,2,2,2,  # 40 - 47 |  | 
| 42     2,2,2,2,2,2,2,2,  # 48 - 4f |  | 
| 43     2,2,2,2,2,2,2,2,  # 50 - 57 |  | 
| 44     2,2,2,2,2,2,2,2,  # 58 - 5f |  | 
| 45     2,2,2,2,2,2,2,2,  # 60 - 67 |  | 
| 46     2,2,2,2,2,2,2,2,  # 68 - 6f |  | 
| 47     2,2,2,2,2,2,2,2,  # 70 - 77 |  | 
| 48     2,2,2,2,2,2,2,1,  # 78 - 7f |  | 
| 49     4,4,4,4,4,4,4,4,  # 80 - 87 |  | 
| 50     4,4,4,4,4,4,4,4,  # 88 - 8f |  | 
| 51     4,4,4,4,4,4,4,4,  # 90 - 97 |  | 
| 52     4,4,4,4,4,4,4,4,  # 98 - 9f |  | 
| 53     4,3,3,3,3,3,3,3,  # a0 - a7 |  | 
| 54     3,3,3,3,3,3,3,3,  # a8 - af |  | 
| 55     3,3,3,3,3,3,3,3,  # b0 - b7 |  | 
| 56     3,3,3,3,3,3,3,3,  # b8 - bf |  | 
| 57     3,3,3,3,3,3,3,3,  # c0 - c7 |  | 
| 58     3,3,3,3,3,3,3,3,  # c8 - cf |  | 
| 59     3,3,3,3,3,3,3,3,  # d0 - d7 |  | 
| 60     3,3,3,3,3,3,3,3,  # d8 - df |  | 
| 61     3,3,3,3,3,3,3,3,  # e0 - e7 |  | 
| 62     3,3,3,3,3,3,3,3,  # e8 - ef |  | 
| 63     3,3,3,3,3,3,3,3,  # f0 - f7 |  | 
| 64     3,3,3,3,3,3,3,0  # f8 - ff |  | 
| 65 ) |  | 
| 66 |  | 
| 67 BIG5_st = ( |  | 
| 68     eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07 |  | 
| 69     eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f |  | 
| 70     eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17 |  | 
| 71 ) |  | 
| 72 |  | 
| 73 Big5CharLenTable = (0, 1, 1, 2, 0) |  | 
| 74 |  | 
| 75 Big5SMModel = {'classTable': BIG5_cls, |  | 
| 76                'classFactor': 5, |  | 
| 77                'stateTable': BIG5_st, |  | 
| 78                'charLenTable': Big5CharLenTable, |  | 
| 79                'name': 'Big5'} |  | 
| 80 |  | 
| 81 # CP949 |  | 
| 82 |  | 
| 83 CP949_cls  = ( |  | 
| 84     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0,  # 00 - 0f |  | 
| 85     1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1,  # 10 - 1f |  | 
| 86     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 20 - 2f |  | 
| 87     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 30 - 3f |  | 
| 88     1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,  # 40 - 4f |  | 
| 89     4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 50 - 5f |  | 
| 90     1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,  # 60 - 6f |  | 
| 91     5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 70 - 7f |  | 
| 92     0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 80 - 8f |  | 
| 93     6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 90 - 9f |  | 
| 94     6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8,  # a0 - af |  | 
| 95     7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,  # b0 - bf |  | 
| 96     7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2,  # c0 - cf |  | 
| 97     2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # d0 - df |  | 
| 98     2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # e0 - ef |  | 
| 99     2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0,  # f0 - ff |  | 
| 100 ) |  | 
| 101 |  | 
| 102 CP949_st = ( |  | 
| 103 #cls=    0      1      2      3      4      5      6      7      8      9  # pre
     vious state = |  | 
| 104     eError,eStart,     3,eError,eStart,eStart,     4,     5,eError,     6, # eSt
     art |  | 
| 105     eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eEr
     ror |  | 
| 106     eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eIt
     sMe |  | 
| 107     eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3 |  | 
| 108     eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4 |  | 
| 109     eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5 |  | 
| 110     eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6 |  | 
| 111 ) |  | 
| 112 |  | 
| 113 CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) |  | 
| 114 |  | 
| 115 CP949SMModel = {'classTable': CP949_cls, |  | 
| 116                 'classFactor': 10, |  | 
| 117                 'stateTable': CP949_st, |  | 
| 118                 'charLenTable': CP949CharLenTable, |  | 
| 119                 'name': 'CP949'} |  | 
| 120 |  | 
| 121 # EUC-JP |  | 
| 122 |  | 
| 123 EUCJP_cls = ( |  | 
| 124     4,4,4,4,4,4,4,4,  # 00 - 07 |  | 
| 125     4,4,4,4,4,4,5,5,  # 08 - 0f |  | 
| 126     4,4,4,4,4,4,4,4,  # 10 - 17 |  | 
| 127     4,4,4,5,4,4,4,4,  # 18 - 1f |  | 
| 128     4,4,4,4,4,4,4,4,  # 20 - 27 |  | 
| 129     4,4,4,4,4,4,4,4,  # 28 - 2f |  | 
| 130     4,4,4,4,4,4,4,4,  # 30 - 37 |  | 
| 131     4,4,4,4,4,4,4,4,  # 38 - 3f |  | 
| 132     4,4,4,4,4,4,4,4,  # 40 - 47 |  | 
| 133     4,4,4,4,4,4,4,4,  # 48 - 4f |  | 
| 134     4,4,4,4,4,4,4,4,  # 50 - 57 |  | 
| 135     4,4,4,4,4,4,4,4,  # 58 - 5f |  | 
| 136     4,4,4,4,4,4,4,4,  # 60 - 67 |  | 
| 137     4,4,4,4,4,4,4,4,  # 68 - 6f |  | 
| 138     4,4,4,4,4,4,4,4,  # 70 - 77 |  | 
| 139     4,4,4,4,4,4,4,4,  # 78 - 7f |  | 
| 140     5,5,5,5,5,5,5,5,  # 80 - 87 |  | 
| 141     5,5,5,5,5,5,1,3,  # 88 - 8f |  | 
| 142     5,5,5,5,5,5,5,5,  # 90 - 97 |  | 
| 143     5,5,5,5,5,5,5,5,  # 98 - 9f |  | 
| 144     5,2,2,2,2,2,2,2,  # a0 - a7 |  | 
| 145     2,2,2,2,2,2,2,2,  # a8 - af |  | 
| 146     2,2,2,2,2,2,2,2,  # b0 - b7 |  | 
| 147     2,2,2,2,2,2,2,2,  # b8 - bf |  | 
| 148     2,2,2,2,2,2,2,2,  # c0 - c7 |  | 
| 149     2,2,2,2,2,2,2,2,  # c8 - cf |  | 
| 150     2,2,2,2,2,2,2,2,  # d0 - d7 |  | 
| 151     2,2,2,2,2,2,2,2,  # d8 - df |  | 
| 152     0,0,0,0,0,0,0,0,  # e0 - e7 |  | 
| 153     0,0,0,0,0,0,0,0,  # e8 - ef |  | 
| 154     0,0,0,0,0,0,0,0,  # f0 - f7 |  | 
| 155     0,0,0,0,0,0,0,5  # f8 - ff |  | 
| 156 ) |  | 
| 157 |  | 
| 158 EUCJP_st = ( |  | 
| 159           3,     4,     3,     5,eStart,eError,eError,eError,#00-07 |  | 
| 160      eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f |  | 
| 161      eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 |  | 
| 162      eError,eError,eStart,eError,eError,eError,     3,eError,#18-1f |  | 
| 163           3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27 |  | 
| 164 ) |  | 
| 165 |  | 
| 166 EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) |  | 
| 167 |  | 
| 168 EUCJPSMModel = {'classTable': EUCJP_cls, |  | 
| 169                 'classFactor': 6, |  | 
| 170                 'stateTable': EUCJP_st, |  | 
| 171                 'charLenTable': EUCJPCharLenTable, |  | 
| 172                 'name': 'EUC-JP'} |  | 
| 173 |  | 
| 174 # EUC-KR |  | 
| 175 |  | 
| 176 EUCKR_cls  = ( |  | 
| 177     1,1,1,1,1,1,1,1,  # 00 - 07 |  | 
| 178     1,1,1,1,1,1,0,0,  # 08 - 0f |  | 
| 179     1,1,1,1,1,1,1,1,  # 10 - 17 |  | 
| 180     1,1,1,0,1,1,1,1,  # 18 - 1f |  | 
| 181     1,1,1,1,1,1,1,1,  # 20 - 27 |  | 
| 182     1,1,1,1,1,1,1,1,  # 28 - 2f |  | 
| 183     1,1,1,1,1,1,1,1,  # 30 - 37 |  | 
| 184     1,1,1,1,1,1,1,1,  # 38 - 3f |  | 
| 185     1,1,1,1,1,1,1,1,  # 40 - 47 |  | 
| 186     1,1,1,1,1,1,1,1,  # 48 - 4f |  | 
| 187     1,1,1,1,1,1,1,1,  # 50 - 57 |  | 
| 188     1,1,1,1,1,1,1,1,  # 58 - 5f |  | 
| 189     1,1,1,1,1,1,1,1,  # 60 - 67 |  | 
| 190     1,1,1,1,1,1,1,1,  # 68 - 6f |  | 
| 191     1,1,1,1,1,1,1,1,  # 70 - 77 |  | 
| 192     1,1,1,1,1,1,1,1,  # 78 - 7f |  | 
| 193     0,0,0,0,0,0,0,0,  # 80 - 87 |  | 
| 194     0,0,0,0,0,0,0,0,  # 88 - 8f |  | 
| 195     0,0,0,0,0,0,0,0,  # 90 - 97 |  | 
| 196     0,0,0,0,0,0,0,0,  # 98 - 9f |  | 
| 197     0,2,2,2,2,2,2,2,  # a0 - a7 |  | 
| 198     2,2,2,2,2,3,3,3,  # a8 - af |  | 
| 199     2,2,2,2,2,2,2,2,  # b0 - b7 |  | 
| 200     2,2,2,2,2,2,2,2,  # b8 - bf |  | 
| 201     2,2,2,2,2,2,2,2,  # c0 - c7 |  | 
| 202     2,3,2,2,2,2,2,2,  # c8 - cf |  | 
| 203     2,2,2,2,2,2,2,2,  # d0 - d7 |  | 
| 204     2,2,2,2,2,2,2,2,  # d8 - df |  | 
| 205     2,2,2,2,2,2,2,2,  # e0 - e7 |  | 
| 206     2,2,2,2,2,2,2,2,  # e8 - ef |  | 
| 207     2,2,2,2,2,2,2,2,  # f0 - f7 |  | 
| 208     2,2,2,2,2,2,2,0   # f8 - ff |  | 
| 209 ) |  | 
| 210 |  | 
| 211 EUCKR_st = ( |  | 
| 212     eError,eStart,     3,eError,eError,eError,eError,eError,#00-07 |  | 
| 213     eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f |  | 
| 214 ) |  | 
| 215 |  | 
| 216 EUCKRCharLenTable = (0, 1, 2, 0) |  | 
| 217 |  | 
| 218 EUCKRSMModel = {'classTable': EUCKR_cls, |  | 
| 219                 'classFactor': 4, |  | 
| 220                 'stateTable': EUCKR_st, |  | 
| 221                 'charLenTable': EUCKRCharLenTable, |  | 
| 222                 'name': 'EUC-KR'} |  | 
| 223 |  | 
| 224 # EUC-TW |  | 
| 225 |  | 
| 226 EUCTW_cls = ( |  | 
| 227     2,2,2,2,2,2,2,2,  # 00 - 07 |  | 
| 228     2,2,2,2,2,2,0,0,  # 08 - 0f |  | 
| 229     2,2,2,2,2,2,2,2,  # 10 - 17 |  | 
| 230     2,2,2,0,2,2,2,2,  # 18 - 1f |  | 
| 231     2,2,2,2,2,2,2,2,  # 20 - 27 |  | 
| 232     2,2,2,2,2,2,2,2,  # 28 - 2f |  | 
| 233     2,2,2,2,2,2,2,2,  # 30 - 37 |  | 
| 234     2,2,2,2,2,2,2,2,  # 38 - 3f |  | 
| 235     2,2,2,2,2,2,2,2,  # 40 - 47 |  | 
| 236     2,2,2,2,2,2,2,2,  # 48 - 4f |  | 
| 237     2,2,2,2,2,2,2,2,  # 50 - 57 |  | 
| 238     2,2,2,2,2,2,2,2,  # 58 - 5f |  | 
| 239     2,2,2,2,2,2,2,2,  # 60 - 67 |  | 
| 240     2,2,2,2,2,2,2,2,  # 68 - 6f |  | 
| 241     2,2,2,2,2,2,2,2,  # 70 - 77 |  | 
| 242     2,2,2,2,2,2,2,2,  # 78 - 7f |  | 
| 243     0,0,0,0,0,0,0,0,  # 80 - 87 |  | 
| 244     0,0,0,0,0,0,6,0,  # 88 - 8f |  | 
| 245     0,0,0,0,0,0,0,0,  # 90 - 97 |  | 
| 246     0,0,0,0,0,0,0,0,  # 98 - 9f |  | 
| 247     0,3,4,4,4,4,4,4,  # a0 - a7 |  | 
| 248     5,5,1,1,1,1,1,1,  # a8 - af |  | 
| 249     1,1,1,1,1,1,1,1,  # b0 - b7 |  | 
| 250     1,1,1,1,1,1,1,1,  # b8 - bf |  | 
| 251     1,1,3,1,3,3,3,3,  # c0 - c7 |  | 
| 252     3,3,3,3,3,3,3,3,  # c8 - cf |  | 
| 253     3,3,3,3,3,3,3,3,  # d0 - d7 |  | 
| 254     3,3,3,3,3,3,3,3,  # d8 - df |  | 
| 255     3,3,3,3,3,3,3,3,  # e0 - e7 |  | 
| 256     3,3,3,3,3,3,3,3,  # e8 - ef |  | 
| 257     3,3,3,3,3,3,3,3,  # f0 - f7 |  | 
| 258     3,3,3,3,3,3,3,0   # f8 - ff |  | 
| 259 ) |  | 
| 260 |  | 
| 261 EUCTW_st = ( |  | 
| 262     eError,eError,eStart,     3,     3,     3,     4,eError,#00-07 |  | 
| 263     eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f |  | 
| 264     eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 |  | 
| 265     eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f |  | 
| 266          5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 |  | 
| 267     eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f |  | 
| 268 ) |  | 
| 269 |  | 
| 270 EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) |  | 
| 271 |  | 
| 272 EUCTWSMModel = {'classTable': EUCTW_cls, |  | 
| 273                 'classFactor': 7, |  | 
| 274                 'stateTable': EUCTW_st, |  | 
| 275                 'charLenTable': EUCTWCharLenTable, |  | 
| 276                 'name': 'x-euc-tw'} |  | 
| 277 |  | 
| 278 # GB2312 |  | 
| 279 |  | 
| 280 GB2312_cls = ( |  | 
| 281     1,1,1,1,1,1,1,1,  # 00 - 07 |  | 
| 282     1,1,1,1,1,1,0,0,  # 08 - 0f |  | 
| 283     1,1,1,1,1,1,1,1,  # 10 - 17 |  | 
| 284     1,1,1,0,1,1,1,1,  # 18 - 1f |  | 
| 285     1,1,1,1,1,1,1,1,  # 20 - 27 |  | 
| 286     1,1,1,1,1,1,1,1,  # 28 - 2f |  | 
| 287     3,3,3,3,3,3,3,3,  # 30 - 37 |  | 
| 288     3,3,1,1,1,1,1,1,  # 38 - 3f |  | 
| 289     2,2,2,2,2,2,2,2,  # 40 - 47 |  | 
| 290     2,2,2,2,2,2,2,2,  # 48 - 4f |  | 
| 291     2,2,2,2,2,2,2,2,  # 50 - 57 |  | 
| 292     2,2,2,2,2,2,2,2,  # 58 - 5f |  | 
| 293     2,2,2,2,2,2,2,2,  # 60 - 67 |  | 
| 294     2,2,2,2,2,2,2,2,  # 68 - 6f |  | 
| 295     2,2,2,2,2,2,2,2,  # 70 - 77 |  | 
| 296     2,2,2,2,2,2,2,4,  # 78 - 7f |  | 
| 297     5,6,6,6,6,6,6,6,  # 80 - 87 |  | 
| 298     6,6,6,6,6,6,6,6,  # 88 - 8f |  | 
| 299     6,6,6,6,6,6,6,6,  # 90 - 97 |  | 
| 300     6,6,6,6,6,6,6,6,  # 98 - 9f |  | 
| 301     6,6,6,6,6,6,6,6,  # a0 - a7 |  | 
| 302     6,6,6,6,6,6,6,6,  # a8 - af |  | 
| 303     6,6,6,6,6,6,6,6,  # b0 - b7 |  | 
| 304     6,6,6,6,6,6,6,6,  # b8 - bf |  | 
| 305     6,6,6,6,6,6,6,6,  # c0 - c7 |  | 
| 306     6,6,6,6,6,6,6,6,  # c8 - cf |  | 
| 307     6,6,6,6,6,6,6,6,  # d0 - d7 |  | 
| 308     6,6,6,6,6,6,6,6,  # d8 - df |  | 
| 309     6,6,6,6,6,6,6,6,  # e0 - e7 |  | 
| 310     6,6,6,6,6,6,6,6,  # e8 - ef |  | 
| 311     6,6,6,6,6,6,6,6,  # f0 - f7 |  | 
| 312     6,6,6,6,6,6,6,0   # f8 - ff |  | 
| 313 ) |  | 
| 314 |  | 
| 315 GB2312_st = ( |  | 
| 316     eError,eStart,eStart,eStart,eStart,eStart,     3,eError,#00-07 |  | 
| 317     eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f |  | 
| 318     eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 |  | 
| 319          4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f |  | 
| 320     eError,eError,     5,eError,eError,eError,eItsMe,eError,#20-27 |  | 
| 321     eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f |  | 
| 322 ) |  | 
| 323 |  | 
| 324 # To be accurate, the length of class 6 can be either 2 or 4. |  | 
| 325 # But it is not necessary to discriminate between the two since |  | 
| 326 # it is used for frequency analysis only, and we are validing |  | 
| 327 # each code range there as well. So it is safe to set it to be |  | 
| 328 # 2 here. |  | 
| 329 GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) |  | 
| 330 |  | 
| 331 GB2312SMModel = {'classTable': GB2312_cls, |  | 
| 332                   'classFactor': 7, |  | 
| 333                   'stateTable': GB2312_st, |  | 
| 334                   'charLenTable': GB2312CharLenTable, |  | 
| 335                   'name': 'GB2312'} |  | 
| 336 |  | 
| 337 # Shift_JIS |  | 
| 338 |  | 
| 339 SJIS_cls = ( |  | 
| 340     1,1,1,1,1,1,1,1,  # 00 - 07 |  | 
| 341     1,1,1,1,1,1,0,0,  # 08 - 0f |  | 
| 342     1,1,1,1,1,1,1,1,  # 10 - 17 |  | 
| 343     1,1,1,0,1,1,1,1,  # 18 - 1f |  | 
| 344     1,1,1,1,1,1,1,1,  # 20 - 27 |  | 
| 345     1,1,1,1,1,1,1,1,  # 28 - 2f |  | 
| 346     1,1,1,1,1,1,1,1,  # 30 - 37 |  | 
| 347     1,1,1,1,1,1,1,1,  # 38 - 3f |  | 
| 348     2,2,2,2,2,2,2,2,  # 40 - 47 |  | 
| 349     2,2,2,2,2,2,2,2,  # 48 - 4f |  | 
| 350     2,2,2,2,2,2,2,2,  # 50 - 57 |  | 
| 351     2,2,2,2,2,2,2,2,  # 58 - 5f |  | 
| 352     2,2,2,2,2,2,2,2,  # 60 - 67 |  | 
| 353     2,2,2,2,2,2,2,2,  # 68 - 6f |  | 
| 354     2,2,2,2,2,2,2,2,  # 70 - 77 |  | 
| 355     2,2,2,2,2,2,2,1,  # 78 - 7f |  | 
| 356     3,3,3,3,3,3,3,3,  # 80 - 87 |  | 
| 357     3,3,3,3,3,3,3,3,  # 88 - 8f |  | 
| 358     3,3,3,3,3,3,3,3,  # 90 - 97 |  | 
| 359     3,3,3,3,3,3,3,3,  # 98 - 9f |  | 
| 360     #0xa0 is illegal in sjis encoding, but some pages does |  | 
| 361     #contain such byte. We need to be more error forgiven. |  | 
| 362     2,2,2,2,2,2,2,2,  # a0 - a7 |  | 
| 363     2,2,2,2,2,2,2,2,  # a8 - af |  | 
| 364     2,2,2,2,2,2,2,2,  # b0 - b7 |  | 
| 365     2,2,2,2,2,2,2,2,  # b8 - bf |  | 
| 366     2,2,2,2,2,2,2,2,  # c0 - c7 |  | 
| 367     2,2,2,2,2,2,2,2,  # c8 - cf |  | 
| 368     2,2,2,2,2,2,2,2,  # d0 - d7 |  | 
| 369     2,2,2,2,2,2,2,2,  # d8 - df |  | 
| 370     3,3,3,3,3,3,3,3,  # e0 - e7 |  | 
| 371     3,3,3,3,3,4,4,4,  # e8 - ef |  | 
| 372     4,4,4,4,4,4,4,4,  # f0 - f7 |  | 
| 373     4,4,4,4,4,0,0,0   # f8 - ff |  | 
| 374 ) |  | 
| 375 |  | 
| 376 |  | 
| 377 SJIS_st = ( |  | 
| 378     eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07 |  | 
| 379     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f |  | 
| 380     eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17 |  | 
| 381 ) |  | 
| 382 |  | 
| 383 SJISCharLenTable = (0, 1, 1, 2, 0, 0) |  | 
| 384 |  | 
| 385 SJISSMModel = {'classTable': SJIS_cls, |  | 
| 386                'classFactor': 6, |  | 
| 387                'stateTable': SJIS_st, |  | 
| 388                'charLenTable': SJISCharLenTable, |  | 
| 389                'name': 'Shift_JIS'} |  | 
| 390 |  | 
| 391 # UCS2-BE |  | 
| 392 |  | 
| 393 UCS2BE_cls = ( |  | 
| 394     0,0,0,0,0,0,0,0,  # 00 - 07 |  | 
| 395     0,0,1,0,0,2,0,0,  # 08 - 0f |  | 
| 396     0,0,0,0,0,0,0,0,  # 10 - 17 |  | 
| 397     0,0,0,3,0,0,0,0,  # 18 - 1f |  | 
| 398     0,0,0,0,0,0,0,0,  # 20 - 27 |  | 
| 399     0,3,3,3,3,3,0,0,  # 28 - 2f |  | 
| 400     0,0,0,0,0,0,0,0,  # 30 - 37 |  | 
| 401     0,0,0,0,0,0,0,0,  # 38 - 3f |  | 
| 402     0,0,0,0,0,0,0,0,  # 40 - 47 |  | 
| 403     0,0,0,0,0,0,0,0,  # 48 - 4f |  | 
| 404     0,0,0,0,0,0,0,0,  # 50 - 57 |  | 
| 405     0,0,0,0,0,0,0,0,  # 58 - 5f |  | 
| 406     0,0,0,0,0,0,0,0,  # 60 - 67 |  | 
| 407     0,0,0,0,0,0,0,0,  # 68 - 6f |  | 
| 408     0,0,0,0,0,0,0,0,  # 70 - 77 |  | 
| 409     0,0,0,0,0,0,0,0,  # 78 - 7f |  | 
| 410     0,0,0,0,0,0,0,0,  # 80 - 87 |  | 
| 411     0,0,0,0,0,0,0,0,  # 88 - 8f |  | 
| 412     0,0,0,0,0,0,0,0,  # 90 - 97 |  | 
| 413     0,0,0,0,0,0,0,0,  # 98 - 9f |  | 
| 414     0,0,0,0,0,0,0,0,  # a0 - a7 |  | 
| 415     0,0,0,0,0,0,0,0,  # a8 - af |  | 
| 416     0,0,0,0,0,0,0,0,  # b0 - b7 |  | 
| 417     0,0,0,0,0,0,0,0,  # b8 - bf |  | 
| 418     0,0,0,0,0,0,0,0,  # c0 - c7 |  | 
| 419     0,0,0,0,0,0,0,0,  # c8 - cf |  | 
| 420     0,0,0,0,0,0,0,0,  # d0 - d7 |  | 
| 421     0,0,0,0,0,0,0,0,  # d8 - df |  | 
| 422     0,0,0,0,0,0,0,0,  # e0 - e7 |  | 
| 423     0,0,0,0,0,0,0,0,  # e8 - ef |  | 
| 424     0,0,0,0,0,0,0,0,  # f0 - f7 |  | 
| 425     0,0,0,0,0,0,4,5   # f8 - ff |  | 
| 426 ) |  | 
| 427 |  | 
| 428 UCS2BE_st  = ( |  | 
| 429           5,     7,     7,eError,     4,     3,eError,eError,#00-07 |  | 
| 430      eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f |  | 
| 431      eItsMe,eItsMe,     6,     6,     6,     6,eError,eError,#10-17 |  | 
| 432           6,     6,     6,     6,     6,eItsMe,     6,     6,#18-1f |  | 
| 433           6,     6,     6,     6,     5,     7,     7,eError,#20-27 |  | 
| 434           5,     8,     6,     6,eError,     6,     6,     6,#28-2f |  | 
| 435           6,     6,     6,     6,eError,eError,eStart,eStart #30-37 |  | 
| 436 ) |  | 
| 437 |  | 
| 438 UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) |  | 
| 439 |  | 
| 440 UCS2BESMModel = {'classTable': UCS2BE_cls, |  | 
| 441                  'classFactor': 6, |  | 
| 442                  'stateTable': UCS2BE_st, |  | 
| 443                  'charLenTable': UCS2BECharLenTable, |  | 
| 444                  'name': 'UTF-16BE'} |  | 
| 445 |  | 
| 446 # UCS2-LE |  | 
| 447 |  | 
| 448 UCS2LE_cls = ( |  | 
| 449     0,0,0,0,0,0,0,0,  # 00 - 07 |  | 
| 450     0,0,1,0,0,2,0,0,  # 08 - 0f |  | 
| 451     0,0,0,0,0,0,0,0,  # 10 - 17 |  | 
| 452     0,0,0,3,0,0,0,0,  # 18 - 1f |  | 
| 453     0,0,0,0,0,0,0,0,  # 20 - 27 |  | 
| 454     0,3,3,3,3,3,0,0,  # 28 - 2f |  | 
| 455     0,0,0,0,0,0,0,0,  # 30 - 37 |  | 
| 456     0,0,0,0,0,0,0,0,  # 38 - 3f |  | 
| 457     0,0,0,0,0,0,0,0,  # 40 - 47 |  | 
| 458     0,0,0,0,0,0,0,0,  # 48 - 4f |  | 
| 459     0,0,0,0,0,0,0,0,  # 50 - 57 |  | 
| 460     0,0,0,0,0,0,0,0,  # 58 - 5f |  | 
| 461     0,0,0,0,0,0,0,0,  # 60 - 67 |  | 
| 462     0,0,0,0,0,0,0,0,  # 68 - 6f |  | 
| 463     0,0,0,0,0,0,0,0,  # 70 - 77 |  | 
| 464     0,0,0,0,0,0,0,0,  # 78 - 7f |  | 
| 465     0,0,0,0,0,0,0,0,  # 80 - 87 |  | 
| 466     0,0,0,0,0,0,0,0,  # 88 - 8f |  | 
| 467     0,0,0,0,0,0,0,0,  # 90 - 97 |  | 
| 468     0,0,0,0,0,0,0,0,  # 98 - 9f |  | 
| 469     0,0,0,0,0,0,0,0,  # a0 - a7 |  | 
| 470     0,0,0,0,0,0,0,0,  # a8 - af |  | 
| 471     0,0,0,0,0,0,0,0,  # b0 - b7 |  | 
| 472     0,0,0,0,0,0,0,0,  # b8 - bf |  | 
| 473     0,0,0,0,0,0,0,0,  # c0 - c7 |  | 
| 474     0,0,0,0,0,0,0,0,  # c8 - cf |  | 
| 475     0,0,0,0,0,0,0,0,  # d0 - d7 |  | 
| 476     0,0,0,0,0,0,0,0,  # d8 - df |  | 
| 477     0,0,0,0,0,0,0,0,  # e0 - e7 |  | 
| 478     0,0,0,0,0,0,0,0,  # e8 - ef |  | 
| 479     0,0,0,0,0,0,0,0,  # f0 - f7 |  | 
| 480     0,0,0,0,0,0,4,5   # f8 - ff |  | 
| 481 ) |  | 
| 482 |  | 
| 483 UCS2LE_st = ( |  | 
| 484           6,     6,     7,     6,     4,     3,eError,eError,#00-07 |  | 
| 485      eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f |  | 
| 486      eItsMe,eItsMe,     5,     5,     5,eError,eItsMe,eError,#10-17 |  | 
| 487           5,     5,     5,eError,     5,eError,     6,     6,#18-1f |  | 
| 488           7,     6,     8,     8,     5,     5,     5,eError,#20-27 |  | 
| 489           5,     5,     5,eError,eError,eError,     5,     5,#28-2f |  | 
| 490           5,     5,     5,eError,     5,eError,eStart,eStart #30-37 |  | 
| 491 ) |  | 
| 492 |  | 
| 493 UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) |  | 
| 494 |  | 
| 495 UCS2LESMModel = {'classTable': UCS2LE_cls, |  | 
| 496                  'classFactor': 6, |  | 
| 497                  'stateTable': UCS2LE_st, |  | 
| 498                  'charLenTable': UCS2LECharLenTable, |  | 
| 499                  'name': 'UTF-16LE'} |  | 
| 500 |  | 
| 501 # UTF-8 |  | 
| 502 |  | 
| 503 UTF8_cls = ( |  | 
| 504     1,1,1,1,1,1,1,1,  # 00 - 07  #allow 0x00 as a legal value |  | 
| 505     1,1,1,1,1,1,0,0,  # 08 - 0f |  | 
| 506     1,1,1,1,1,1,1,1,  # 10 - 17 |  | 
| 507     1,1,1,0,1,1,1,1,  # 18 - 1f |  | 
| 508     1,1,1,1,1,1,1,1,  # 20 - 27 |  | 
| 509     1,1,1,1,1,1,1,1,  # 28 - 2f |  | 
| 510     1,1,1,1,1,1,1,1,  # 30 - 37 |  | 
| 511     1,1,1,1,1,1,1,1,  # 38 - 3f |  | 
| 512     1,1,1,1,1,1,1,1,  # 40 - 47 |  | 
| 513     1,1,1,1,1,1,1,1,  # 48 - 4f |  | 
| 514     1,1,1,1,1,1,1,1,  # 50 - 57 |  | 
| 515     1,1,1,1,1,1,1,1,  # 58 - 5f |  | 
| 516     1,1,1,1,1,1,1,1,  # 60 - 67 |  | 
| 517     1,1,1,1,1,1,1,1,  # 68 - 6f |  | 
| 518     1,1,1,1,1,1,1,1,  # 70 - 77 |  | 
| 519     1,1,1,1,1,1,1,1,  # 78 - 7f |  | 
| 520     2,2,2,2,3,3,3,3,  # 80 - 87 |  | 
| 521     4,4,4,4,4,4,4,4,  # 88 - 8f |  | 
| 522     4,4,4,4,4,4,4,4,  # 90 - 97 |  | 
| 523     4,4,4,4,4,4,4,4,  # 98 - 9f |  | 
| 524     5,5,5,5,5,5,5,5,  # a0 - a7 |  | 
| 525     5,5,5,5,5,5,5,5,  # a8 - af |  | 
| 526     5,5,5,5,5,5,5,5,  # b0 - b7 |  | 
| 527     5,5,5,5,5,5,5,5,  # b8 - bf |  | 
| 528     0,0,6,6,6,6,6,6,  # c0 - c7 |  | 
| 529     6,6,6,6,6,6,6,6,  # c8 - cf |  | 
| 530     6,6,6,6,6,6,6,6,  # d0 - d7 |  | 
| 531     6,6,6,6,6,6,6,6,  # d8 - df |  | 
| 532     7,8,8,8,8,8,8,8,  # e0 - e7 |  | 
| 533     8,8,8,8,8,9,8,8,  # e8 - ef |  | 
| 534     10,11,11,11,11,11,11,11,  # f0 - f7 |  | 
| 535     12,13,13,13,14,15,0,0    # f8 - ff |  | 
| 536 ) |  | 
| 537 |  | 
| 538 UTF8_st = ( |  | 
| 539     eError,eStart,eError,eError,eError,eError,     12,   10,#00-07 |  | 
| 540          9,     11,     8,     7,     6,     5,     4,    3,#08-0f |  | 
| 541     eError,eError,eError,eError,eError,eError,eError,eError,#10-17 |  | 
| 542     eError,eError,eError,eError,eError,eError,eError,eError,#18-1f |  | 
| 543     eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 |  | 
| 544     eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f |  | 
| 545     eError,eError,     5,     5,     5,     5,eError,eError,#30-37 |  | 
| 546     eError,eError,eError,eError,eError,eError,eError,eError,#38-3f |  | 
| 547     eError,eError,eError,     5,     5,     5,eError,eError,#40-47 |  | 
| 548     eError,eError,eError,eError,eError,eError,eError,eError,#48-4f |  | 
| 549     eError,eError,     7,     7,     7,     7,eError,eError,#50-57 |  | 
| 550     eError,eError,eError,eError,eError,eError,eError,eError,#58-5f |  | 
| 551     eError,eError,eError,eError,     7,     7,eError,eError,#60-67 |  | 
| 552     eError,eError,eError,eError,eError,eError,eError,eError,#68-6f |  | 
| 553     eError,eError,     9,     9,     9,     9,eError,eError,#70-77 |  | 
| 554     eError,eError,eError,eError,eError,eError,eError,eError,#78-7f |  | 
| 555     eError,eError,eError,eError,eError,     9,eError,eError,#80-87 |  | 
| 556     eError,eError,eError,eError,eError,eError,eError,eError,#88-8f |  | 
| 557     eError,eError,    12,    12,    12,    12,eError,eError,#90-97 |  | 
| 558     eError,eError,eError,eError,eError,eError,eError,eError,#98-9f |  | 
| 559     eError,eError,eError,eError,eError,    12,eError,eError,#a0-a7 |  | 
| 560     eError,eError,eError,eError,eError,eError,eError,eError,#a8-af |  | 
| 561     eError,eError,    12,    12,    12,eError,eError,eError,#b0-b7 |  | 
| 562     eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf |  | 
| 563     eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 |  | 
| 564     eError,eError,eError,eError,eError,eError,eError,eError #c8-cf |  | 
| 565 ) |  | 
| 566 |  | 
| 567 UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) |  | 
| 568 |  | 
| 569 UTF8SMModel = {'classTable': UTF8_cls, |  | 
| 570                'classFactor': 16, |  | 
| 571                'stateTable': UTF8_st, |  | 
| 572                'charLenTable': UTF8CharLenTable, |  | 
| 573                'name': 'UTF-8'} |  | 
| 574 |  | 
| 575 # flake8: noqa |  | 
| OLD | NEW | 
|---|