Index: scripts/sjis_gen.sh |
diff --git a/scripts/sjis_gen.sh b/scripts/sjis_gen.sh |
index adadb790184931e6f35858581860589b5eb9efc1..b98bf8065835a7dc8d9d71774fe7a70c479c5673 100755 |
--- a/scripts/sjis_gen.sh |
+++ b/scripts/sjis_gen.sh |
@@ -4,11 +4,11 @@ |
# found in the LICENSE file. |
# References: |
-# http://encoding.spec.whatwg.org/#shift_jis |
+# https://encoding.spec.whatwg.org/#shift_jis |
# Download the following file, run it in source/data/mappings directory |
# and save the result to euc-jp-html5.ucm |
-# http://encoding.spec.whatwg.org/index-jis0208.txt |
+# https://encoding.spec.whatwg.org/index-jis0208.txt |
function preamble { |
cat <<PREAMBLE |
@@ -18,7 +18,7 @@ cat <<PREAMBLE |
# * Corporation and others. All Rights Reserved. |
# * |
# * Generated per the algorithm for Shift_JIS |
-# * described at http://encoding.spec.whatwg.org/#shift_jis |
+# * described at https://encoding.spec.whatwg.org/#shift_jis |
# * |
# *************************************************************************** |
<code_set_name> "shift_jis-html5" |
@@ -26,12 +26,19 @@ cat <<PREAMBLE |
<mb_cur_max> 2 |
<mb_cur_min> 1 |
<uconv_class> "MBCS" |
-<subchar> \xFC\xFC |
-<subchar1> \x7F |
+<subchar> \x3F |
<icu:charsetFamily> "ASCII" |
-<icu:state> 0-80, 81-9f:1, a1-df, e0-fc:1 |
+<icu:state> 0-80, 81-9f:1, a1-df, e0-fc:1, 82:3, 84:4, 85-86:2, 87:5, 88:2, 98:6, eb-ec:2, ef:2, f9:2, fc:7 |
+ |
<icu:state> 40-7e, 80-fc |
+<icu:state> 80-fc |
+<icu:state> 4f-7e, 80-fc, 59-5f.i, 7a-7e.i |
+<icu:state> 40-7e, 80-fc, 61-6f.i |
+<icu:state> 40-7e, 80-fc, 76-7d.i |
+<icu:state> 40-7e, 80-fc, 73-7e.i |
+<icu:state> 40-4b, 80-fc |
+ |
CHARMAP |
PREAMBLE |
@@ -57,7 +64,7 @@ function half_width_kana { |
} |
-# From http://encoding.spec.whatwg.org/#index-shift_jis-pointer |
+# From https://encoding.spec.whatwg.org/#index-shift_jis-pointer |
# The index shift_jis pointer for code point is the return value of |
# these steps for the round-trip code points (tag = 0) |
# |
@@ -86,7 +93,7 @@ function jis208 { |
# EUDC (End User Defined Characters) is for decoding only |
# (use '|3' to denote that). |
-# See http://encoding.spec.whatwg.org/#shift_jis-decoder - step 5 |
+# See https://encoding.spec.whatwg.org/#shift_jis-decoder - step 5 |
# This function is called twice with {0x40, 0x7E, 0x40} and {0x80, 0xFC, 0x41} |
# to implement it. |
@@ -117,6 +124,7 @@ function unsorted_table { |
echo '<U203E> \x7E |1' |
} |
+wget -N -r -nd https://encoding.spec.whatwg.org/index-jis0208.txt |
preamble |
unsorted_table | sort | uniq |
echo 'END CHARMAP' |