Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(154)

Unified Diff: scripts/euckr_gen.sh

Issue 839713003: ICU update to 54 step 3 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: fix big5 mapping Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « scripts/eucjp_gen.sh ('k') | scripts/ibm866_gen.sh » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: scripts/euckr_gen.sh
diff --git a/scripts/euckr_gen.sh b/scripts/euckr_gen.sh
new file mode 100644
index 0000000000000000000000000000000000000000..497be5583cd8834db7d732e2ddc13f861e3ef12d
--- /dev/null
+++ b/scripts/euckr_gen.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# References:
+# https://encoding.spec.whatwg.org/#euc-kr
+
+# This script downloads the following file.
+# https://encoding.spec.whatwg.org/index-euc-kr.txt
+
+function preamble {
+cat <<PREAMBLE
+# ***************************************************************************
+# *
+# * Copyright (C) 1995-2015, International Business Machines
+# * Corporation and others. All Rights Reserved.
+# *
+# * Generated per the algorithm for EUC-KR
+# * described at http://encoding.spec.whatwg.org/#euc-kr
+# *
+# ***************************************************************************
+<code_set_name> "euc-kr-html"
+<mb_cur_max> 2
+<mb_cur_min> 1
+<uconv_class> "MBCS"
+<subchar> \x3F
+<icu:charsetFamily> "ASCII"
+
+<icu:state> 0-80, 81-fe:1, ff
+<icu:state> 41-5a, 61-7a, 81-fe
+
+CHARMAP
+PREAMBLE
+}
+
+function ascii {
+ for i in $(seq 0 127)
+ do
+ printf '<U%04X> \\x%02X |0\n' $i $i
+ done
+}
+
+
+# HKSCS characters are not supported in encoding ( |lead < 0xA1| )
+function euckr {
+ awk '!/^#/ && !/^$/ \
+ { pointer = $1; \
+ ucs = substr($2, 3); \
+ lead = pointer / 190 + 0x81; \
+ trail = $1 % 190 + 0x41; \
+ tag = 0; \
+ printf ("<U%4s> \\x%02X\\x%02X |%d\n", ucs,\
+ lead, trail, tag);\
+ }' \
+ index-euc-kr.txt
+}
+
+function unsorted_table {
+ euckr
+}
+
+curl -o index-euc-kr.txt https://encoding.spec.whatwg.org/index-euc-kr.txt
+preamble
+ascii
+unsorted_table | sort -k1 | uniq
+echo 'END CHARMAP'
« no previous file with comments | « scripts/eucjp_gen.sh ('k') | scripts/ibm866_gen.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698