Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(190)

Side by Side Diff: scripts/trim_data.sh

Issue 1624643003: ICU 56 step 2: Make the tree ready for local modifications (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561upstream
Patch Set: review comments addressed Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « android/patch_locale.sh ('k') | source/data/Makefile.in » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/bin/bash 1 #!/bin/bash
2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 6
7 # Remove display names for languages that are not listed in the accept-language 7 # Remove display names for languages that are not listed in the accept-language
8 # list of Chromium. 8 # list of Chromium.
9 function filter_display_language_names { 9 function filter_display_language_names {
10 for lang in $(grep -v '^#' accept_lang.list) 10 for lang in $(grep -v '^#' "${scriptdir}/accept_lang.list")
11 do 11 do
12 # Set $OP to '|' only if $ACCEPT_LANG_PATTERN is not empty. 12 # Set $OP to '|' only if $ACCEPT_LANG_PATTERN is not empty.
13 OP=${ACCEPT_LANG_PATTERN:+|} 13 OP=${ACCEPT_LANG_PATTERN:+|}
14 ACCEPT_LANG_PATTERN="${ACCEPT_LANG_PATTERN}${OP}${lang}" 14 ACCEPT_LANG_PATTERN="${ACCEPT_LANG_PATTERN}${OP}${lang}"
15 done 15 done
16 ACCEPT_LANG_PATTERN="(${ACCEPT_LANG_PATTERN})[^a-z]" 16 ACCEPT_LANG_PATTERN="(${ACCEPT_LANG_PATTERN})[^a-z]"
17 17
18 echo "Filtering out display names for non-A-L languages ${langdatapath}" 18 echo "Filtering out display names for non-A-L languages ${langdatapath}"
19 for lang in $(grep -v '^#' chrome_ui_languages.list) 19 for lang in $(grep -v '^#' "${scriptdir}/chrome_ui_languages.list")
20 do 20 do
21 target=${langdatapath}/${lang}.txt 21 target=${langdatapath}/${lang}.txt
22 echo Overwriting ${target} ... 22 echo Overwriting ${target} ...
23 sed -r -i \ 23 sed -r -i \
24 '/^ Keys\{$/,/^ \}$/d 24 '/^ Keys\{$/,/^ \}$/d
25 /^ Languages\{$/, /^ \}$/ { 25 /^ Languages\{$/, /^ \}$/ {
26 /^ Languages\{$/p 26 /^ Languages\{$/p
27 /^ '${ACCEPT_LANG_PATTERN}'/p 27 /^ '${ACCEPT_LANG_PATTERN}'/p
28 /^ \}$/p 28 /^ \}$/p
29 d 29 d
30 } 30 }
31 /^ Types\{$/,/^ \}$/d 31 /^ Types\{$/,/^ \}$/d
32 /^ Variants\{$/,/^ \}$/d' ${target} 32 /^ Variants\{$/,/^ \}$/d' ${target}
33 33
34 # Delete an empty "Languages" block. Otherwise, getting the display 34 # Delete an empty "Languages" block. Otherwise, getting the display
35 # name for all the language in a given locale (e.g. en_GB) would fail 35 # name for all the language in a given locale (e.g. en_GB) would fail
36 # when the above filtering sed command results in an empty "Languages" 36 # when the above filtering sed command results in an empty "Languages"
37 # block. 37 # block.
38 sed -r -i \ 38 sed -r -i \
39 '/^ Languages\{$/ { 39 '/^ Languages\{$/ {
40 N 40 N
41 /^ Languages\{\n \}/ d 41 /^ Languages\{\n \}/ d
42 }' ${target} 42 }' ${target}
43 done 43 done
44 } 44 }
45 45
46 46
47 # Keep only the minimum locale data for non-UI languages. 47 # Keep only the minimum locale data for non-UI languages.
48 function abridge_locale_data_for_non_ui_languages { 48 function abridge_locale_data_for_non_ui_languages {
49 for lang in $(grep -v '^#' chrome_ui_languages.list) 49 for lang in $(grep -v '^#' "${scriptdir}/chrome_ui_languages.list")
50 do 50 do
51 # Set $OP to '|' only if $UI_LANGUAGES is not empty. 51 # Set $OP to '|' only if $UI_LANGUAGES is not empty.
52 OP=${UI_LANGUAGES:+|} 52 OP=${UI_LANGUAGES:+|}
53 UI_LANGUAGES="${UI_LANGUAGES}${OP}${lang}" 53 UI_LANGUAGES="${UI_LANGUAGES}${OP}${lang}"
54 done 54 done
55 55
56 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list) 56 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" \
57 "${scriptdir}/accept_lang.list")
57 58
58 echo Creating minimum locale data in ${localedatapath} 59 echo Creating minimum locale data in ${localedatapath}
59 for lang in ${EXTRA_LANGUAGES} 60 for lang in ${EXTRA_LANGUAGES}
60 do 61 do
61 target=${localedatapath}/${lang}.txt 62 target=${localedatapath}/${lang}.txt
62 [ -e ${target} ] || { echo "missing ${lang}"; continue; } 63 [ -e ${target} ] || { echo "missing ${lang}"; continue; }
63 echo Overwriting ${target} ... 64 echo Overwriting ${target} ...
64 65
65 # Do not include '%%Parent' line on purpose. 66 # Do not include '%%Parent' line on purpose.
66 sed -n -r -i \ 67 sed -n -r -i \
(...skipping 26 matching lines...) Expand all
93 } 94 }
94 /^\}$/p' ${target} 95 /^\}$/p' ${target}
95 done 96 done
96 } 97 }
97 98
98 # Keep only the currencies used by the larget 150 economies in terms of GDP. 99 # Keep only the currencies used by the larget 150 economies in terms of GDP.
99 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. 100 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies.
100 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies 101 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies
101 function filter_currency_data { 102 function filter_currency_data {
102 unset KEEPLIST 103 unset KEEPLIST
103 for currency in $(grep -v '^#' currencies.list) 104 for currency in $(grep -v '^#' "${scriptdir}/currencies.list")
104 do 105 do
105 OP=${KEEPLIST:+|} 106 OP=${KEEPLIST:+|}
106 KEEPLIST=${KEEPLIST}${OP}${currency} 107 KEEPLIST=${KEEPLIST}${OP}${currency}
107 done 108 done
108 KEEPLIST="(${KEEPLIST})" 109 KEEPLIST="(${KEEPLIST})"
109 110
110 for i in ${dataroot}/curr/*.txt 111 for i in ${dataroot}/curr/*.txt
111 do 112 do
112 locale=$(basename $i .txt) 113 locale=$(basename $i .txt)
113 [ $locale == 'supplementalData' ] && continue; 114 [ $locale == 'supplementalData' ] && continue;
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 } 174 }
174 175
175 # big5han and gb2312han collation do not make any sense and nobody uses them. 176 # big5han and gb2312han collation do not make any sense and nobody uses them.
176 function remove_legacy_chinese_codepoint_collation { 177 function remove_legacy_chinese_codepoint_collation {
177 echo "Removing Big5 / GB2312 / UniHan collation data from Chinese locale" 178 echo "Removing Big5 / GB2312 / UniHan collation data from Chinese locale"
178 target="${dataroot}/coll/zh.txt" 179 target="${dataroot}/coll/zh.txt"
179 echo "Overwriting ${target}" 180 echo "Overwriting ${target}"
180 sed -r -i '/^ (uni|big5|gb2312)han\{$/,/^ \}$/ d' ${target} 181 sed -r -i '/^ (uni|big5|gb2312)han\{$/,/^ \}$/ d' ${target}
181 } 182 }
182 183
183 dataroot="$(dirname $0)/../source/data" 184 treeroot="$(dirname "$0")/.."
185 dataroot="${treeroot}/source/data"
186 scriptdir="${treeroot}/scripts"
184 localedatapath="${dataroot}/locales" 187 localedatapath="${dataroot}/locales"
185 langdatapath="${dataroot}/lang" 188 langdatapath="${dataroot}/lang"
186 189
187 190
188 191
189 filter_display_language_names 192 filter_display_language_names
190 abridge_locale_data_for_non_ui_languages 193 abridge_locale_data_for_non_ui_languages
191 filter_currency_data 194 filter_currency_data
192 filter_region_data 195 filter_region_data
193 remove_legacy_chinese_codepoint_collation 196 remove_legacy_chinese_codepoint_collation
194 filter_unit_data 197 filter_unit_data
195 198
196 # Chromium OS needs exemplar cities for timezones, but not Chromium. 199 # Chromium OS needs exemplar cities for timezones, but not Chromium.
197 # It'll save 400kB (uncompressed), but the size difference in 200 # It'll save 400kB (uncompressed), but the size difference in
198 # 7z compressed installer is <= 100kB. 201 # 7z compressed installer is <= 100kB.
199 # TODO(jshin): Make separate data files for CrOS and Chromium. 202 # TODO(jshin): Make separate data files for CrOS and Chromium.
200 #remove_exemplar_cities 203 #remove_exemplar_cities
OLDNEW
« no previous file with comments | « android/patch_locale.sh ('k') | source/data/Makefile.in » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698