| Index: scripts/single_byte_gen.sh
|
| ===================================================================
|
| --- scripts/single_byte_gen.sh (revision 0)
|
| +++ scripts/single_byte_gen.sh (working copy)
|
| @@ -0,0 +1,62 @@
|
| +#!/bin/bash
|
| +# Copyright (c) 2014 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +function preamble {
|
| +
|
| +encoding="$1"
|
| +cat <<PREAMBLE
|
| +# ***************************************************************************
|
| +# *
|
| +# * Generated from index-$encoding.txt (
|
| +# * https://encoding.spec.whatwg.org/index-${encoding}.txt )
|
| +# * following the algorithm for the single byte legacy encoding
|
| +# * described at http://encoding.spec.whatwg.org/#single-byte-decoder
|
| +# *
|
| +# ***************************************************************************
|
| +<code_set_name> "${encoding}-html"
|
| +<char_name_mask> "AXXXX"
|
| +<mb_cur_max> 1
|
| +<mb_cur_min> 1
|
| +<uconv_class> "SBCS"
|
| +<subchar> \x3F
|
| +<icu:charsetFamily> "ASCII"
|
| +
|
| +CHARMAP
|
| +PREAMBLE
|
| +
|
| +}
|
| +
|
| +# The list of html5 encodings. Note that iso-8859-8-i is not listed here
|
| +# because its mapping table is exactly the same as iso-8859-8. The difference
|
| +# is BiDi handling (logical vs visual).
|
| +encodings="ibm866 iso-8859-2 iso-8859-3 iso-8859-4 iso-8859-5 iso-8859-6\
|
| + iso-8859-7 iso-8859-8 iso-8859-10 iso-8859-13 iso-8859-14\
|
| + iso-8859-15 iso-8859-16 koi8-r koi8-u macintosh\
|
| + windows-874 windows-1250 windows-1251 windows-1252 windows-1253\
|
| + windows-1254 windows-1255 windows-1256 windows-1257 windows-1258\
|
| + x-mac-cyrillic"
|
| +
|
| +ENCODING_DIR="$(dirname $0)/../source/data/mappings"
|
| +for e in ${encodings}
|
| +do
|
| + output="${ENCODING_DIR}/${e}-html.ucm"
|
| + index="index-${e}.txt"
|
| + indexurl="https://encoding.spec.whatwg.org/index-${e}.txt"
|
| + curl -o ${index} "${indexurl}"
|
| + preamble ${e} > ${output}
|
| + awk 'BEGIN \
|
| + { \
|
| + for (i=0; i < 0x80; ++i) \
|
| + { \
|
| + printf("<U%04X> \\x%02X |0\n", i, i);} \
|
| + } \
|
| + !/^#/ && !/^$/ \
|
| + {
|
| + printf ("<U%4s> \\x%02X |0\n", substr($2, 3), $1 + 0x80); \
|
| + }' ${index} | sort >> ${output}
|
| + echo 'END CHARMAP' >> ${output}
|
| + rm ${index}
|
| +done
|
| +
|
|
|
| Property changes on: scripts/single_byte_gen.sh
|
| ___________________________________________________________________
|
| Added: svn:executable
|
| ## -0,0 +1 ##
|
| +*
|
| \ No newline at end of property
|
| Added: svn:eol-style
|
| ## -0,0 +1 ##
|
| +LF
|
| \ No newline at end of property
|
|
|