| Index: scripts/big5_gen.sh
|
| diff --git a/scripts/big5_gen.sh b/scripts/big5_gen.sh
|
| index 58bb680f5b6c817c8baa2f2bbf534c19cb885caf..7b57d525ffc43dd8db46f94ba14f33a0d9246a6e 100644
|
| --- a/scripts/big5_gen.sh
|
| +++ b/scripts/big5_gen.sh
|
| @@ -29,10 +29,22 @@ cat <<PREAMBLE
|
| <icu:charsetFamily> "ASCII"
|
|
|
| # 'p' is for the range that may produce non-BMP code points.
|
| +# 'i' is to make the code range illegal.
|
| +# Big5 has a lot of small holes in the 2nd byte. If it's in the ASCII range,
|
| +# the 2nd byte has to be added back to the stream to be compliant to the
|
| +# encoding spec. Each state adds 1kB in the data size.
|
| # See http://userguide.icu-project.org/conversion/data.
|
| -<icu:state> 0-7f, 87-fe:1, 87-a0:2, c8:2, fa-fe:2
|
| +<icu:state> 0-7f, a1-fe:1, 87-a0:2, c8:2, fa-fe:2, 87:3, 89:4, 8a:5, 8b:6, 8d:7, 9b:8, 9f:9, a0:a
|
| <icu:state> 40-7e, a1-fe
|
| <icu:state> 40-7e.p, a1-fe.p
|
| +<icu:state> 40-7e.p, a1-fe.p, 66.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 42.i, 44.i, 45.i, 4a-4b.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 42.i, 63.i, 75.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 54.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 41.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 61.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 4e.i
|
| +<icu:state> 40-7e.p, a1-fe.p, 54.i, 57.i, 5a.i, 62.i, 72.i
|
|
|
| CHARMAP
|
| PREAMBLE
|
|
|