Index: scripts/euckr_gen.sh |
diff --git a/scripts/euckr_gen.sh b/scripts/euckr_gen.sh |
new file mode 100644 |
index 0000000000000000000000000000000000000000..497be5583cd8834db7d732e2ddc13f861e3ef12d |
--- /dev/null |
+++ b/scripts/euckr_gen.sh |
@@ -0,0 +1,67 @@ |
+#!/bin/sh |
+# Copyright 2015 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+# References: |
+# https://encoding.spec.whatwg.org/#euc-kr |
+ |
+# This script downloads the following file. |
+# https://encoding.spec.whatwg.org/index-euc-kr.txt |
+ |
+function preamble { |
+cat <<PREAMBLE |
+# *************************************************************************** |
+# * |
+# * Copyright (C) 1995-2015, International Business Machines |
+# * Corporation and others. All Rights Reserved. |
+# * |
+# * Generated per the algorithm for EUC-KR |
+# * described at http://encoding.spec.whatwg.org/#euc-kr |
+# * |
+# *************************************************************************** |
+<code_set_name> "euc-kr-html" |
+<mb_cur_max> 2 |
+<mb_cur_min> 1 |
+<uconv_class> "MBCS" |
+<subchar> \x3F |
+<icu:charsetFamily> "ASCII" |
+ |
+<icu:state> 0-80, 81-fe:1, ff |
+<icu:state> 41-5a, 61-7a, 81-fe |
+ |
+CHARMAP |
+PREAMBLE |
+} |
+ |
+function ascii { |
+ for i in $(seq 0 127) |
+ do |
+ printf '<U%04X> \\x%02X |0\n' $i $i |
+ done |
+} |
+ |
+ |
+# HKSCS characters are not supported in encoding ( |lead < 0xA1| ) |
+function euckr { |
+ awk '!/^#/ && !/^$/ \ |
+ { pointer = $1; \ |
+ ucs = substr($2, 3); \ |
+ lead = pointer / 190 + 0x81; \ |
+ trail = $1 % 190 + 0x41; \ |
+ tag = 0; \ |
+ printf ("<U%4s> \\x%02X\\x%02X |%d\n", ucs,\ |
+ lead, trail, tag);\ |
+ }' \ |
+ index-euc-kr.txt |
+} |
+ |
+function unsorted_table { |
+ euckr |
+} |
+ |
+curl -o index-euc-kr.txt https://encoding.spec.whatwg.org/index-euc-kr.txt |
+preamble |
+ascii |
+unsorted_table | sort -k1 | uniq |
+echo 'END CHARMAP' |