Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(30)

Side by Side Diff: scripts/eucjp_gen.sh

Issue 984233002: Update CJK converters and their generating scripts (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: add EUC-KR to README.chromium Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « scripts/big5_gen.sh ('k') | scripts/euckr_gen.sh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/bin/sh 1 #!/bin/sh
2 # Copyright 2014 The Chromium Authors. All rights reserved. 2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 # References: 6 # References:
7 # http://encoding.spec.whatwg.org/#euc-jp 7 # https://encoding.spec.whatwg.org/#euc-jp
8 # http://legacy-encoding.sourceforge.jp/wiki/index.php?cp51932 8 # https://legacy-encoding.sourceforge.jp/wiki/index.php?cp51932
9 # http://www.iana.org/assignments/charset-reg/CP51932 9 # https://www.iana.org/assignments/charset-reg/CP51932
10 # Table 3-64 in CJKV Information Processing 2/e. 10 # Table 3-64 in CJKV Information Processing 2/e.
11 11
12 # Download the following two files, run it in source/data/mappings directory 12 # Download the following two files, run it in source/data/mappings directory
13 # and save the result to euc-jp-html5.ucm 13 # and save the result to euc-jp-html5.ucm
14 # http://encoding.spec.whatwg.org/index-jis0208.txt 14 # https://encoding.spec.whatwg.org/index-jis0208.txt
15 # http://encoding.spec.whatwg.org/index-jis0212.txt 15 # https://encoding.spec.whatwg.org/index-jis0212.txt
16 16
17 function preamble { 17 function preamble {
18 cat <<PREAMBLE 18 cat <<PREAMBLE
19 # *************************************************************************** 19 # ***************************************************************************
20 # * 20 # *
21 # * Copyright (C) 1995-2014, International Business Machines 21 # * Copyright (C) 1995-2014, International Business Machines
22 # * Corporation and others. All Rights Reserved. 22 # * Corporation and others. All Rights Reserved.
23 # * 23 # *
24 # * Generated per the algorithm for EUC-JP 24 # * Generated per the algorithm for EUC-JP
25 # * described at http://encoding.spec.whatwg.org/#euc-jp. 25 # * described at https://encoding.spec.whatwg.org/#euc-jp.
26 # * 26 # *
27 # *************************************************************************** 27 # ***************************************************************************
28 <code_set_name> "euc-jp-html5" 28 <code_set_name> "euc-jp-html"
29 <char_name_mask> "AXXXX" 29 <char_name_mask> "AXXXX"
30 <mb_cur_max> 3 30 <mb_cur_max> 3
31 <mb_cur_min> 1 31 <mb_cur_min> 1
32 <uconv_class> "MBCS" 32 <uconv_class> "MBCS"
33 <subchar> \xF4\xFE 33 <subchar> \x3F
34 <subchar1> \x1A
35 <icu:charsetFamily> "ASCII" 34 <icu:charsetFamily> "ASCII"
36 35
37 <icu:state> 0-7f, 8e:2, 8f:3, a1-fe:1 36 <icu:state> 0-7f, 8e:2, 8f:3, a1-fe:1
38 <icu:state> a1-fe 37 <icu:state> a1-fe
39 <icu:state> a1-e2 38 <icu:state> a1-df
40 <icu:state> a1-fe:1, a1:4, a3-a5:4, a8:4, ac-af:4, ee-f2:4, f4 -fe:4 39 <icu:state> a1-fe:1, a1:4, a3-a5:4, a8:4, ac-af:4, ee-f2:4, f4 -fe:4
41 <icu:state> a1-fe.u 40 <icu:state> a1-fe.u
42 41
43 CHARMAP 42 CHARMAP
44 PREAMBLE 43 PREAMBLE
45 } 44 }
46 45
47 #<U0000> \x00 |0 46 #<U0000> \x00 |0
48 function ascii { 47 function ascii {
49 for i in $(seq 0 127) 48 for i in $(seq 0 127)
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 { printf ("<U%4s> \\x8F\\x%02X\\x%02X |3\n", substr($2, 3),\ 90 { printf ("<U%4s> \\x8F\\x%02X\\x%02X |3\n", substr($2, 3),\
92 $1 / 94 + 0xA1, $1 % 94 + 0xA1);}' \ 91 $1 / 94 + 0xA1, $1 % 94 + 0xA1);}' \
93 index-jis0212.txt 92 index-jis0212.txt
94 } 93 }
95 94
96 function unsorted_table { 95 function unsorted_table {
97 ascii 96 ascii
98 half_width_kana 97 half_width_kana
99 jis208 98 jis208
100 jis212 99 jis212
101 decode_only_extra
102 echo '<U00A5> \x5C |1' 100 echo '<U00A5> \x5C |1'
103 echo '<U203E> \x7E |1' 101 echo '<U203E> \x7E |1'
104 } 102 }
105 103
104 wget -N -r -nd https://encoding.spec.whatwg.org/index-jis0208.txt
105 wget -N -r -nd https://encoding.spec.whatwg.org/index-jis0212.txt
106 preamble 106 preamble
107 unsorted_table | sort | uniq 107 unsorted_table | sort | uniq
108 echo 'END CHARMAP' 108 echo 'END CHARMAP'
OLDNEW
« no previous file with comments | « scripts/big5_gen.sh ('k') | scripts/euckr_gen.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698