OLD | NEW |
(Empty) | |
| 1 #!/bin/sh |
| 2 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 # References: |
| 7 # https://encoding.spec.whatwg.org/#euc-kr |
| 8 |
| 9 # This script downloads the following file. |
| 10 # https://encoding.spec.whatwg.org/index-euc-kr.txt |
| 11 |
| 12 function preamble { |
| 13 cat <<PREAMBLE |
| 14 # *************************************************************************** |
| 15 # * |
| 16 # * Copyright (C) 1995-2015, International Business Machines |
| 17 # * Corporation and others. All Rights Reserved. |
| 18 # * |
| 19 # * Generated per the algorithm for EUC-KR |
| 20 # * described at http://encoding.spec.whatwg.org/#euc-kr |
| 21 # * |
| 22 # *************************************************************************** |
| 23 <code_set_name> "euc-kr-html" |
| 24 <mb_cur_max> 2 |
| 25 <mb_cur_min> 1 |
| 26 <uconv_class> "MBCS" |
| 27 <subchar> \x3F |
| 28 <icu:charsetFamily> "ASCII" |
| 29 |
| 30 <icu:state> 0-80, 81-fe:1, ff |
| 31 <icu:state> 41-5a, 61-7a, 81-fe |
| 32 |
| 33 CHARMAP |
| 34 PREAMBLE |
| 35 } |
| 36 |
| 37 function ascii { |
| 38 for i in $(seq 0 127) |
| 39 do |
| 40 printf '<U%04X> \\x%02X |0\n' $i $i |
| 41 done |
| 42 } |
| 43 |
| 44 |
| 45 # HKSCS characters are not supported in encoding ( |lead < 0xA1| ) |
| 46 function euckr { |
| 47 awk '!/^#/ && !/^$/ \ |
| 48 { pointer = $1; \ |
| 49 ucs = substr($2, 3); \ |
| 50 lead = pointer / 190 + 0x81; \ |
| 51 trail = $1 % 190 + 0x41; \ |
| 52 tag = 0; \ |
| 53 printf ("<U%4s> \\x%02X\\x%02X |%d\n", ucs,\ |
| 54 lead, trail, tag);\ |
| 55 }' \ |
| 56 index-euc-kr.txt |
| 57 } |
| 58 |
| 59 function unsorted_table { |
| 60 euckr |
| 61 } |
| 62 |
| 63 curl -o index-euc-kr.txt https://encoding.spec.whatwg.org/index-euc-kr.txt |
| 64 preamble |
| 65 ascii |
| 66 unsorted_table | sort -k1 | uniq |
| 67 echo 'END CHARMAP' |
OLD | NEW |