| Index: source/data/mappings/big5-html.ucm
|
| diff --git a/source/data/mappings/big5-html.ucm b/source/data/mappings/big5-html.ucm
|
| index bf0f5f7139cc7e3b466f0f91707aeeab968f2429..d17cbf5395eb353ca8c57558477182a999ca7347 100644
|
| --- a/source/data/mappings/big5-html.ucm
|
| +++ b/source/data/mappings/big5-html.ucm
|
| @@ -16,10 +16,22 @@
|
| <icu:charsetFamily> "ASCII"
|
|
|
| # 'p' is for the range that may produce non-BMP code points.
|
| +# 'i' is to make the code range illegal.
|
| +# Big5 has a lot of small holes in the 2nd byte. If it's in the ASCII range,
|
| +# the 2nd byte has to be added back to the stream to be compliant to the
|
| +# encoding spec. Each state adds 1kB in the data size.
|
| # See http://userguide.icu-project.org/conversion/data.
|
| -<icu:state> 0-7f, 87-fe:1, 87-a0:2, c8:2, fa-fe:2
|
| +<icu:state> 0-7f, a1-fe:1, 87-a0:2, c8:2, fa-fe:2, 87:3, 89:4, 8a:5, 8b:6, 8d:7, 9b:8, 9f:9, a0:a
|
| <icu:state> 40-7e, a1-fe
|
| <icu:state> 40-7e.p, a1-fe.p
|
| +<icu:state> 40-7e.p, a1-fe.p, 66.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 42.i, 44.i, 45.i, 4a-4b.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 42.i, 63.i, 75.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 54.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 41.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 61.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 4e.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 54.i, 57.i, 5a.i, 62.i, 72.i
|
|
|
| CHARMAP
|
| <U0000> \x00 |0
|
|
|