OLD | NEW |
1 #!/bin/sh | 1 #!/bin/sh |
2 # Copyright 2015 The Chromium Authors. All rights reserved. | 2 # Copyright 2015 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 # References: | 6 # References: |
7 # https://encoding.spec.whatwg.org/#euc-kr | 7 # https://encoding.spec.whatwg.org/#euc-kr |
8 | 8 |
9 # This script downloads the following file. | 9 # This script downloads the following file. |
10 # https://encoding.spec.whatwg.org/index-euc-kr.txt | 10 # https://encoding.spec.whatwg.org/index-euc-kr.txt |
11 | 11 |
12 function preamble { | 12 function preamble { |
13 cat <<PREAMBLE | 13 cat <<PREAMBLE |
14 # *************************************************************************** | 14 # *************************************************************************** |
15 # * | 15 # * |
16 # * Copyright (C) 1995-2015, International Business Machines | 16 # * Copyright (C) 1995-2015, International Business Machines |
17 # * Corporation and others. All Rights Reserved. | 17 # * Corporation and others. All Rights Reserved. |
18 # * | 18 # * |
19 # * Generated per the algorithm for EUC-KR | 19 # * Generated per the algorithm for EUC-KR |
20 # * described at http://encoding.spec.whatwg.org/#euc-kr | 20 # * described at http://encoding.spec.whatwg.org/#euc-kr |
21 # * | 21 # * |
22 # *************************************************************************** | 22 # *************************************************************************** |
23 <code_set_name> "euc-kr-html" | 23 <code_set_name> "euc-kr-html" |
24 <mb_cur_max> 2 | 24 <mb_cur_max> 2 |
25 <mb_cur_min> 1 | 25 <mb_cur_min> 1 |
26 <uconv_class> "MBCS" | 26 <uconv_class> "MBCS" |
27 <subchar> \x3F | 27 <subchar> \x3F |
28 <icu:charsetFamily> "ASCII" | 28 <icu:charsetFamily> "ASCII" |
29 | 29 |
30 <icu:state> 0-80, 81-fe:1, ff | 30 # 81-fe in states 2 and 3 can be tigher and a1-fe, but |
| 31 # to be compliant to HTML5 spec, it should be 81-fe. |
| 32 <icu:state> 0-7f, 81-c5:1, c6:2, c7-fe:3 |
31 <icu:state> 41-5a, 61-7a, 81-fe | 33 <icu:state> 41-5a, 61-7a, 81-fe |
| 34 <icu:state> 41-52, 81-fe |
| 35 <icu:state> 81-fe |
32 | 36 |
33 CHARMAP | 37 CHARMAP |
34 PREAMBLE | 38 PREAMBLE |
35 } | 39 } |
36 | 40 |
37 function ascii { | 41 function ascii { |
38 for i in $(seq 0 127) | 42 for i in $(seq 0 127) |
39 do | 43 do |
40 printf '<U%04X> \\x%02X |0\n' $i $i | 44 printf '<U%04X> \\x%02X |0\n' $i $i |
41 done | 45 done |
(...skipping 11 matching lines...) Expand all Loading... |
53 printf ("<U%4s> \\x%02X\\x%02X |%d\n", ucs,\ | 57 printf ("<U%4s> \\x%02X\\x%02X |%d\n", ucs,\ |
54 lead, trail, tag);\ | 58 lead, trail, tag);\ |
55 }' \ | 59 }' \ |
56 index-euc-kr.txt | 60 index-euc-kr.txt |
57 } | 61 } |
58 | 62 |
59 function unsorted_table { | 63 function unsorted_table { |
60 euckr | 64 euckr |
61 } | 65 } |
62 | 66 |
63 curl -o index-euc-kr.txt https://encoding.spec.whatwg.org/index-euc-kr.txt | 67 wget -N -r -nd https://encoding.spec.whatwg.org/index-euc-kr.txt |
64 preamble | 68 preamble |
65 ascii | 69 ascii |
66 unsorted_table | sort -k1 | uniq | 70 unsorted_table | sort -k1 | uniq |
67 echo 'END CHARMAP' | 71 echo 'END CHARMAP' |
OLD | NEW |