| OLD | NEW |
| 1 #!/bin/sh | 1 #!/bin/sh |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. | 2 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 # References: | 6 # References: |
| 7 # http://encoding.spec.whatwg.org/#euc-jp | 7 # https://encoding.spec.whatwg.org/#euc-jp |
| 8 # http://legacy-encoding.sourceforge.jp/wiki/index.php?cp51932 | 8 # https://legacy-encoding.sourceforge.jp/wiki/index.php?cp51932 |
| 9 # http://www.iana.org/assignments/charset-reg/CP51932 | 9 # https://www.iana.org/assignments/charset-reg/CP51932 |
| 10 # Table 3-64 in CJKV Information Processing 2/e. | 10 # Table 3-64 in CJKV Information Processing 2/e. |
| 11 | 11 |
| 12 # Download the following two files, run it in source/data/mappings directory | 12 # Download the following two files, run it in source/data/mappings directory |
| 13 # and save the result to euc-jp-html5.ucm | 13 # and save the result to euc-jp-html5.ucm |
| 14 # http://encoding.spec.whatwg.org/index-jis0208.txt | 14 # https://encoding.spec.whatwg.org/index-jis0208.txt |
| 15 # http://encoding.spec.whatwg.org/index-jis0212.txt | 15 # https://encoding.spec.whatwg.org/index-jis0212.txt |
| 16 | 16 |
| 17 function preamble { | 17 function preamble { |
| 18 cat <<PREAMBLE | 18 cat <<PREAMBLE |
| 19 # *************************************************************************** | 19 # *************************************************************************** |
| 20 # * | 20 # * |
| 21 # * Copyright (C) 1995-2014, International Business Machines | 21 # * Copyright (C) 1995-2014, International Business Machines |
| 22 # * Corporation and others. All Rights Reserved. | 22 # * Corporation and others. All Rights Reserved. |
| 23 # * | 23 # * |
| 24 # * Generated per the algorithm for EUC-JP | 24 # * Generated per the algorithm for EUC-JP |
| 25 # * described at http://encoding.spec.whatwg.org/#euc-jp. | 25 # * described at https://encoding.spec.whatwg.org/#euc-jp. |
| 26 # * | 26 # * |
| 27 # *************************************************************************** | 27 # *************************************************************************** |
| 28 <code_set_name> "euc-jp-html5" | 28 <code_set_name> "euc-jp-html" |
| 29 <char_name_mask> "AXXXX" | 29 <char_name_mask> "AXXXX" |
| 30 <mb_cur_max> 3 | 30 <mb_cur_max> 3 |
| 31 <mb_cur_min> 1 | 31 <mb_cur_min> 1 |
| 32 <uconv_class> "MBCS" | 32 <uconv_class> "MBCS" |
| 33 <subchar> \xF4\xFE | 33 <subchar> \x3F |
| 34 <subchar1> \x1A | |
| 35 <icu:charsetFamily> "ASCII" | 34 <icu:charsetFamily> "ASCII" |
| 36 | 35 |
| 37 <icu:state> 0-7f, 8e:2, 8f:3, a1-fe:1 | 36 <icu:state> 0-7f, 8e:2, 8f:3, a1-fe:1 |
| 38 <icu:state> a1-fe | 37 <icu:state> a1-fe |
| 39 <icu:state> a1-e2 | 38 <icu:state> a1-df |
| 40 <icu:state> a1-fe:1, a1:4, a3-a5:4, a8:4, ac-af:4, ee-f2:4, f4
-fe:4 | 39 <icu:state> a1-fe:1, a1:4, a3-a5:4, a8:4, ac-af:4, ee-f2:4, f4
-fe:4 |
| 41 <icu:state> a1-fe.u | 40 <icu:state> a1-fe.u |
| 42 | 41 |
| 43 CHARMAP | 42 CHARMAP |
| 44 PREAMBLE | 43 PREAMBLE |
| 45 } | 44 } |
| 46 | 45 |
| 47 #<U0000> \x00 |0 | 46 #<U0000> \x00 |0 |
| 48 function ascii { | 47 function ascii { |
| 49 for i in $(seq 0 127) | 48 for i in $(seq 0 127) |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 91 { printf ("<U%4s> \\x8F\\x%02X\\x%02X |3\n", substr($2, 3),\ | 90 { printf ("<U%4s> \\x8F\\x%02X\\x%02X |3\n", substr($2, 3),\ |
| 92 $1 / 94 + 0xA1, $1 % 94 + 0xA1);}' \ | 91 $1 / 94 + 0xA1, $1 % 94 + 0xA1);}' \ |
| 93 index-jis0212.txt | 92 index-jis0212.txt |
| 94 } | 93 } |
| 95 | 94 |
| 96 function unsorted_table { | 95 function unsorted_table { |
| 97 ascii | 96 ascii |
| 98 half_width_kana | 97 half_width_kana |
| 99 jis208 | 98 jis208 |
| 100 jis212 | 99 jis212 |
| 101 decode_only_extra | |
| 102 echo '<U00A5> \x5C |1' | 100 echo '<U00A5> \x5C |1' |
| 103 echo '<U203E> \x7E |1' | 101 echo '<U203E> \x7E |1' |
| 104 } | 102 } |
| 105 | 103 |
| 104 wget -N -r -nd https://encoding.spec.whatwg.org/index-jis0208.txt |
| 105 wget -N -r -nd https://encoding.spec.whatwg.org/index-jis0212.txt |
| 106 preamble | 106 preamble |
| 107 unsorted_table | sort | uniq | 107 unsorted_table | sort | uniq |
| 108 echo 'END CHARMAP' | 108 echo 'END CHARMAP' |
| OLD | NEW |