Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: scripts/big5_gen.sh

Issue 1514253003: Converter update for Big5, KOI8-U and timezone update to 2015g (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: readme update Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « android/icudtl.dat ('k') | source/data/in/icudtl.dat » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/bin/sh 1 #!/bin/sh
2 # Copyright 2015 The Chromium Authors. All rights reserved. 2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 # References: 6 # References:
7 # https://encoding.spec.whatwg.org/#big5 7 # https://encoding.spec.whatwg.org/#big5
8 8
9 # This script downloads the following file. 9 # This script downloads the following file.
10 # https://encoding.spec.whatwg.org/index-big5.txt 10 # https://encoding.spec.whatwg.org/index-big5.txt
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
52 52
53 function ascii { 53 function ascii {
54 for i in $(seq 0 127) 54 for i in $(seq 0 127)
55 do 55 do
56 printf '<U%04X> \\x%02X |0\n' $i $i 56 printf '<U%04X> \\x%02X |0\n' $i $i
57 done 57 done
58 } 58 }
59 59
60 60
61 # HKSCS characters are not supported in encoding ( |lead < 0xA1| ) 61 # HKSCS characters are not supported in encoding ( |lead < 0xA1| )
62 # Entries with pointer=528[79] have to be decoding-only even though 62 # Entries with pointer=528[79] and 5247 ~ 5250 have to be decoding-only
63 # come before the other entry with the same Unicode character. 63 # even though they come before the other entry with the same Unicode
64 # character. The corresponding Unicode characters are U+255[0E],
65 # U+256[1A], and U+534[15].
64 # See https://www.w3.org/Bugs/Public/show_bug.cgi?id=27878 66 # See https://www.w3.org/Bugs/Public/show_bug.cgi?id=27878
65 function big5 { 67 function big5 {
66 awk '!/^#/ && !/^$/ \ 68 awk '!/^#/ && !/^$/ \
67 { pointer = $1; \ 69 { pointer = $1; \
68 ucs = substr($2, 3); \ 70 ucs = substr($2, 3); \
69 sortkey = (length(ucs) < 5) ? ("0" ucs) : ucs; 71 sortkey = (length(ucs) < 5) ? ("0" ucs) : ucs;
70 lead = pointer / 157 + 0x81; \ 72 lead = pointer / 157 + 0x81; \
71 is_decoding_only = lead < 0xA1 || seen_before[ucs] || \ 73 is_decoding_only = lead < 0xA1 || seen_before[ucs] || \
72 pointer == 5287 || pointer == 5289; \ 74 pointer == 5287 || pointer == 5289 || \
75 (5247 <= pointer && pointer <= 5250);
73 trail = $1 % 157; \ 76 trail = $1 % 157; \
74 trail_offset = trail < 0x3F ? 0x40 : 0x62; \ 77 trail_offset = trail < 0x3F ? 0x40 : 0x62; \
75 tag = (is_decoding_only ? 3 : 0); \ 78 tag = (is_decoding_only ? 3 : 0); \
76 printf ("<U%4s> \\x%02X\\x%02X |%d %s\n", ucs,\ 79 printf ("<U%4s> \\x%02X\\x%02X |%d %s\n", ucs,\
77 lead, trail + trail_offset, tag, sortkey);\ 80 lead, trail + trail_offset, tag, sortkey);\
78 seen_before[ucs] = is_decoding_only ? 0 : 1; \ 81 seen_before[ucs] = is_decoding_only ? 0 : 1; \
79 }' \ 82 }' \
80 index-big5.txt 83 index-big5.txt
81 } 84 }
82 85
83 function two_char_seq { 86 function two_char_seq {
84 cat <<EOF 87 cat <<EOF
85 <U00CA><U0304> \x88\x62 |3 000CA 88 <U00CA><U0304> \x88\x62 |3 000CA
86 <U00CA><U030C> \x88\x64 |3 000CA 89 <U00CA><U030C> \x88\x64 |3 000CA
87 <U00EA><U0304> \x88\xA3 |3 000EA 90 <U00EA><U0304> \x88\xA3 |3 000EA
88 <U00EA><U030C> \x88\xA5 |3 000EA 91 <U00EA><U030C> \x88\xA5 |3 000EA
89 EOF 92 EOF
90 } 93 }
91 94
92 function unsorted_table { 95 function unsorted_table {
93 two_char_seq 96 two_char_seq
94 big5 97 big5
95 } 98 }
96 99
97 wget -N -r -nd https://encoding.spec.whatwg.org/index-big5.txt 100 wget -N -r -nd https://encoding.spec.whatwg.org/index-big5.txt
98 preamble 101 preamble
99 ascii 102 ascii
100 unsorted_table | sort -k4 | uniq | cut -f 1-3 -d ' ' 103 unsorted_table | sort -k4 | uniq | cut -f 1-3 -d ' '
101 echo 'END CHARMAP' 104 echo 'END CHARMAP'
OLDNEW
« no previous file with comments | « android/icudtl.dat ('k') | source/data/in/icudtl.dat » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698