| OLD | NEW |
| 1 #!/bin/sh | 1 #!/bin/sh |
| 2 # Copyright 2015 The Chromium Authors. All rights reserved. | 2 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 # References: | 6 # References: |
| 7 # https://encoding.spec.whatwg.org/#big5 | 7 # https://encoding.spec.whatwg.org/#big5 |
| 8 | 8 |
| 9 # This script downloads the following file. | 9 # This script downloads the following file. |
| 10 # https://encoding.spec.whatwg.org/index-big5.txt | 10 # https://encoding.spec.whatwg.org/index-big5.txt |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 # *************************************************************************** | 22 # *************************************************************************** |
| 23 <code_set_name> "big5-html" | 23 <code_set_name> "big5-html" |
| 24 <char_name_mask> "AXXXX" | 24 <char_name_mask> "AXXXX" |
| 25 <mb_cur_max> 2 | 25 <mb_cur_max> 2 |
| 26 <mb_cur_min> 1 | 26 <mb_cur_min> 1 |
| 27 <uconv_class> "MBCS" | 27 <uconv_class> "MBCS" |
| 28 <subchar> \x3F | 28 <subchar> \x3F |
| 29 <icu:charsetFamily> "ASCII" | 29 <icu:charsetFamily> "ASCII" |
| 30 | 30 |
| 31 # 'p' is for the range that may produce non-BMP code points. | 31 # 'p' is for the range that may produce non-BMP code points. |
| 32 # 'i' is to make the code range illegal. |
| 33 # Big5 has a lot of small holes in the 2nd byte. If it's in the ASCII range, |
| 34 # the 2nd byte has to be added back to the stream to be compliant to the |
| 35 # encoding spec. Each state adds 1kB in the data size. |
| 32 # See http://userguide.icu-project.org/conversion/data. | 36 # See http://userguide.icu-project.org/conversion/data. |
| 33 <icu:state> 0-7f, 87-fe:1, 87-a0:2, c8:2, fa-fe:2 | 37 <icu:state> 0-7f, a1-fe:1, 87-a0:2, c8:2, fa-fe:2, 87:3, 89:4,
8a:5, 8b:6, 8d:7, 9b:8, 9f:9, a0:a |
| 34 <icu:state> 40-7e, a1-fe | 38 <icu:state> 40-7e, a1-fe |
| 35 <icu:state> 40-7e.p, a1-fe.p | 39 <icu:state> 40-7e.p, a1-fe.p |
| 40 <icu:state> 40-7e.p, a1-fe.p, 66.i |
| 41 <icu:state> 40-7e.p, a1-fe.p, 42.i, 44.i, 45.i, 4a-4b.i |
| 42 <icu:state> 40-7e.p, a1-fe.p, 42.i, 63.i, 75.i |
| 43 <icu:state> 40-7e.p, a1-fe.p, 54.i |
| 44 <icu:state> 40-7e.p, a1-fe.p, 41.i |
| 45 <icu:state> 40-7e.p, a1-fe.p, 61.i |
| 46 <icu:state> 40-7e.p, a1-fe.p, 4e.i |
| 47 <icu:state> 40-7e.p, a1-fe.p, 54.i, 57.i, 5a.i, 62.i, 72.i |
| 36 | 48 |
| 37 CHARMAP | 49 CHARMAP |
| 38 PREAMBLE | 50 PREAMBLE |
| 39 } | 51 } |
| 40 | 52 |
| 41 function ascii { | 53 function ascii { |
| 42 for i in $(seq 0 127) | 54 for i in $(seq 0 127) |
| 43 do | 55 do |
| 44 printf '<U%04X> \\x%02X |0\n' $i $i | 56 printf '<U%04X> \\x%02X |0\n' $i $i |
| 45 done | 57 done |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 80 function unsorted_table { | 92 function unsorted_table { |
| 81 two_char_seq | 93 two_char_seq |
| 82 big5 | 94 big5 |
| 83 } | 95 } |
| 84 | 96 |
| 85 wget -N -r -nd https://encoding.spec.whatwg.org/index-big5.txt | 97 wget -N -r -nd https://encoding.spec.whatwg.org/index-big5.txt |
| 86 preamble | 98 preamble |
| 87 ascii | 99 ascii |
| 88 unsorted_table | sort -k4 | uniq | cut -f 1-3 -d ' ' | 100 unsorted_table | sort -k4 | uniq | cut -f 1-3 -d ' ' |
| 89 echo 'END CHARMAP' | 101 echo 'END CHARMAP' |
| OLD | NEW |