Index: icu52/scripts/eucjp_gen.sh |
=================================================================== |
--- icu52/scripts/eucjp_gen.sh (revision 266668) |
+++ icu52/scripts/eucjp_gen.sh (working copy) |
@@ -9,7 +9,7 @@ |
# http://www.iana.org/assignments/charset-reg/CP51932 |
# Table 3-64 in CJKV Information Processing 2/e. |
-# Download the following two files, run it in source/data/mappings directory |
+# Download the following two files, run it in source/data/mappings directory |
# and save the result to euc-jp-html5.ucm |
# http://encoding.spec.whatwg.org/index-jis0208.txt |
# http://encoding.spec.whatwg.org/index-jis0212.txt |
@@ -23,8 +23,6 @@ |
# * |
# * Generated per the algorithm for EUC-JP |
# * described at http://encoding.spec.whatwg.org/#euc-jp. |
-# * Added the 34 decoding only (EUC-JP to Unicode) entries from euc-jp-2007.ucm |
-# * for the backward compatibility. |
# * |
# *************************************************************************** |
<code_set_name> "euc-jp-html5" |
@@ -55,11 +53,12 @@ |
} |
-function fullwidth_ascii { |
+# Map 0x8E 0x[A1-DF] to U+FF61 to U+FF9F |
+function half_width_kana { |
for i in $(seq 0xA1 0xDF) |
do |
# 65377 = 0xFF61, 161 = 0xA1 |
- printf '<U%04X> \\x%02X |0\n' $(($i + 65377 - 161)) $i |
+ printf '<U%04X> \\x8E\\x%02X |0\n' $(($i + 65377 - 161)) $i |
done |
} |
@@ -94,34 +93,9 @@ |
index-jis0212.txt |
} |
-# Add the uni-directional mapping entries (EUC-JP to Unicode) that |
-# are only present in euc-jp-2007.ucm. There are 34 of them. They're added |
-# for the backward compatibility with the old behavior of Chrome. |
-# See https://www.w3.org/Bugs/Public/show_bug.cgi?id=25266 |
-# Here are the break-downs: |
-# 1. 0x8E0xE0 to 0x8E0xE2 |
-# 00A2 00A3 00AC |
-# 2. JIS X 0212 extra (0x8F 0xF3 0xhh) |
-# 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 |
-# 2172 2173 2174 2175 2176 2177 2178 2179 221A 2220 2229 222A 222B 2235 2252 |
-# 2261 22A5 3231 |
-# 3. JIS X 0208 extra : 0xFC 0xFB => FFE2 |
- |
-function decode_only_extra { |
- decode_only_list=$( |
- for i in $(grep '|3' euc-jp-2007.ucm | sed 's/^<U\(....\)>.*$/\1/') |
- do |
- grep 0x${i} index-jis0212.txt > /dev/null || echo $i |
- done) |
- |
- for u in $decode_only_list |
- do |
- grep $u euc-jp-2007.ucm | grep '|3' |
- done |
-} |
- |
function unsorted_table { |
ascii |
+ half_width_kana |
jis208 |
jis212 |
decode_only_extra |