OLD | NEW |
(Empty) | |
| 1 #!/bin/bash |
| 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 function preamble { |
| 7 |
| 8 encoding="$1" |
| 9 cat <<PREAMBLE |
| 10 # *************************************************************************** |
| 11 # * |
| 12 # * Generated from index-$encoding.txt ( |
| 13 # * https://encoding.spec.whatwg.org/index-${encoding}.txt ) |
| 14 # * following the algorithm for the single byte legacy encoding |
| 15 # * described at http://encoding.spec.whatwg.org/#single-byte-decoder |
| 16 # * |
| 17 # *************************************************************************** |
| 18 <code_set_name> "${encoding}-html" |
| 19 <char_name_mask> "AXXXX" |
| 20 <mb_cur_max> 1 |
| 21 <mb_cur_min> 1 |
| 22 <uconv_class> "SBCS" |
| 23 <subchar> \x3F |
| 24 <icu:charsetFamily> "ASCII" |
| 25 |
| 26 CHARMAP |
| 27 PREAMBLE |
| 28 |
| 29 } |
| 30 |
| 31 # The list of html5 encodings. Note that iso-8859-8-i is not listed here |
| 32 # because its mapping table is exactly the same as iso-8859-8. The difference |
| 33 # is BiDi handling (logical vs visual). |
| 34 encodings="ibm866 iso-8859-2 iso-8859-3 iso-8859-4 iso-8859-5 iso-8859-6\ |
| 35 iso-8859-7 iso-8859-8 iso-8859-10 iso-8859-13 iso-8859-14\ |
| 36 iso-8859-15 iso-8859-16 koi8-r koi8-u macintosh\ |
| 37 windows-874 windows-1250 windows-1251 windows-1252 windows-1253\ |
| 38 windows-1254 windows-1255 windows-1256 windows-1257 windows-1258\ |
| 39 x-mac-cyrillic" |
| 40 |
| 41 ENCODING_DIR="$(dirname $0)/../source/data/mappings" |
| 42 for e in ${encodings} |
| 43 do |
| 44 output="${ENCODING_DIR}/${e}-html.ucm" |
| 45 index="index-${e}.txt" |
| 46 indexurl="https://encoding.spec.whatwg.org/index-${e}.txt" |
| 47 curl -o ${index} "${indexurl}" |
| 48 preamble ${e} > ${output} |
| 49 awk 'BEGIN \ |
| 50 { \ |
| 51 for (i=0; i < 0x80; ++i) \ |
| 52 { \ |
| 53 printf("<U%04X> \\x%02X |0\n", i, i);} \ |
| 54 } \ |
| 55 !/^#/ && !/^$/ \ |
| 56 { |
| 57 printf ("<U%4s> \\x%02X |0\n", substr($2, 3), $1 + 0x80); \ |
| 58 }' ${index} | sort >> ${output} |
| 59 echo 'END CHARMAP' >> ${output} |
| 60 rm ${index} |
| 61 done |
| 62 |
OLD | NEW |