Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(474)

Side by Side Diff: scripts/trim_data.sh

Issue 872903002: ICU update to 54 - step 6 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@step5
Patch Set: trailing ws removed in 5 lines Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « scripts/currencies_to_drop.list ('k') | source/data/coll/collocal.mk » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/bin/bash 1 #!/bin/bash
2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 6
7 # Remove display names for languages that are not listed in the accept-language 7 # Remove display names for languages that are not listed in the accept-language
8 # list of Chromium. 8 # list of Chromium.
9 function filter_display_language_names { 9 function filter_display_language_names {
10 for lang in $(grep -v '^#' accept_lang.list) 10 for lang in $(grep -v '^#' accept_lang.list)
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
44 done 44 done
45 45
46 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list) 46 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list)
47 47
48 echo Creating minimum locale data in ${localedatapath} 48 echo Creating minimum locale data in ${localedatapath}
49 for lang in ${EXTRA_LANGUAGES} 49 for lang in ${EXTRA_LANGUAGES}
50 do 50 do
51 target=${localedatapath}/${lang}.txt 51 target=${localedatapath}/${lang}.txt
52 [ -e ${target} ] || { echo "missing ${lang}"; continue; } 52 [ -e ${target} ] || { echo "missing ${lang}"; continue; }
53 echo Overwriting ${target} ... 53 echo Overwriting ${target} ...
54
55 # Do not include '%%Parent' line on purpose.
54 sed -n -r -i \ 56 sed -n -r -i \
55 '1, /^'${lang}'\{$/p 57 '1, /^'${lang}'\{$/p
56 /^ "%%ALIAS"\{/p 58 /^ "%%ALIAS"\{/p
57 /^ AuxExemplarCharacters\{.*\}$/p 59 /^ AuxExemplarCharacters\{.*\}$/p
58 /^ AuxExemplarCharacters\{$/, /^ \}$/p 60 /^ AuxExemplarCharacters\{$/, /^ \}$/p
59 /^ ExemplarCharacters\{.*\}$/p 61 /^ ExemplarCharacters\{.*\}$/p
60 /^ ExemplarCharacters\{$/, /^ \}$/p 62 /^ ExemplarCharacters\{$/, /^ \}$/p
61 /^ (LocaleScript|layout)\{$/, /^ \}$/p 63 /^ (LocaleScript|layout)\{$/, /^ \}$/p
62 /^ Version\{.*$/p 64 /^ Version\{.*$/p
63 /^\}$/p' ${target} 65 /^\}$/p' ${target}
64 done 66 done
65 67
66 echo Creating minimum locale data in ${langdatapath} 68 echo Creating minimum locale data in ${langdatapath}
67 for lang in ${EXTRA_LANGUAGES} 69 for lang in ${EXTRA_LANGUAGES}
68 do 70 do
69 target=${langdatapath}/${lang}.txt 71 target=${langdatapath}/${lang}.txt
70 [ -e ${target} ] || { echo "missing ${lang}"; continue; } 72 [ -e ${target} ] || { echo "missing ${lang}"; continue; }
71 echo Overwriting ${target} ... 73 echo Overwriting ${target} ...
74
75 # Do not include '%%Parent' line on purpose.
72 sed -n -r -i \ 76 sed -n -r -i \
73 '1, /^'${lang}'\{$/p 77 '1, /^'${lang}'\{$/p
78 /^ "%%ALIAS"\{/p
74 /^ Languages\{$/, /^ \}$/ { 79 /^ Languages\{$/, /^ \}$/ {
75 /^ Languages\{$/p 80 /^ Languages\{$/p
76 /^ '${lang}'\{.*\}$/p 81 /^ '${lang}'\{.*\}$/p
77 /^ \}$/p 82 /^ \}$/p
78 } 83 }
79 /^\}$/p' ${target} 84 /^\}$/p' ${target}
80 done 85 done
81 } 86 }
82 87
83 # Drop historic currencies. 88 # Keep only the currencies used by the larget 150 economies in terms of GDP.
84 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. 89 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies.
85 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies 90 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies
86 function filter_currency_data { 91 function filter_currency_data {
87 for currency in $(grep -v '^#' currencies_to_drop.list) 92 unset KEEPLIST
93 for currency in $(grep -v '^#' currencies.list)
88 do 94 do
89 OP=${DROPLIST:+|} 95 OP=${KEEPLIST:+|}
90 DROPLIST=${DROPLIST}${OP}${currency} 96 KEEPLIST=${KEEPLIST}${OP}${currency}
91 done 97 done
92 DROPLIST="(${DROPLIST})\{" 98 KEEPLIST="(${KEEPLIST})"
93 99
94 cd "${dataroot}/curr" 100 for i in ${dataroot}/curr/*.txt
95 for i in *.txt
96 do 101 do
97 [ $i != 'supplementalData.txt' ] && \ 102 locale=$(basename $i .txt)
98 sed -r -i '/^ '$DROPLIST'/, /^ }/ d' $i 103 [ $locale == 'supplementalData' ] && continue;
104 echo "Overwriting $i for $locale"
105 sed -n -r -i \
106 '1, /^'${locale}'\{$/ p
107 /^ "%%ALIAS"\{/p
108 /^ %%Parent\{/p
109 /^ Currencies\{$/, /^ \}$/ {
110 /^ Currencies\{$/ p
111 /^ '$KEEPLIST'\{$/, /^ \}$/ p
112 /^ \}$/ p
113 }
114 /^ Currencies%narrow\{$/, /^ \}$/ {
115 /^ Currencies%narrow\{$/ p
116 /^ '$KEEPLIST'\{".*\}$/ p
117 /^ \}$/ p
118 }
119 /^ CurrencyPlurals\{$/, /^ \}$/ {
120 /^ CurrencyPlurals\{$/ p
121 /^ '$KEEPLIST'\{$/, /^ \}$/ p
122 /^ \}$/ p
123 }
124 /^ [cC]urrency(Map|Meta|Spacing|UnitPatterns)\{$/, /^ \}$/ p
125 /^ Version\{.*\}$/p
126 /^\}$/p' $i
99 done 127 done
100 } 128 }
101 129
102 # Remove the display names for numeric region codes other than 130 # Remove the display names for numeric region codes other than
103 # 419 (Latin America) because we don't use them. 131 # 419 (Latin America) because we don't use them.
104 function filter_region_data { 132 function filter_region_data {
105 cd "${dataroot}/region" 133 sed -i '/[0-35-9][0-9][0-9]{/ d' ${dataroot}/region/*.txt
106 sed -i '/[0-35-9][0-9][0-9]{/ d' *.txt
107 } 134 }
108 135
109 136
110 137
111 function remove_exemplar_cities { 138 function remove_exemplar_cities {
112 cd "${dataroot}/zone" 139 for i in ${dataroot}/zone/*.txt
113 for i in *.txt
114 do 140 do
115 [ $i != 'root.txt' ] && \ 141 [ $i != 'root.txt' ] && \
116 sed -i '/^ zoneStrings/, /^ "meta:/ { 142 sed -i '/^ zoneStrings/, /^ "meta:/ {
117 /^ zoneStrings/ p 143 /^ zoneStrings/ p
118 /^ "meta:/ p 144 /^ "meta:/ p
119 d 145 d
120 }' $i 146 }' $i
121 done 147 done
122 } 148 }
123 149
124 # Keep only duration and compound in units* sections. 150 # Keep only duration and compound in units* sections.
125 function filter_locale_data { 151 function filter_unit_data {
126 for i in ${dataroot}/locales/*.txt 152 for i in ${dataroot}/unit/*.txt
127 do 153 do
128 echo Overwriting $i ... 154 echo Overwriting $i ...
129 sed -r -i \ 155 sed -r -i \
130 '/^ units(|Narrow|Short)\{$/, /^ \}$/ { 156 '/^ units(|Narrow|Short)\{$/, /^ \}$/ {
131 /^ units(|Narrow|Short)\{$/ p 157 /^ units(|Narrow|Short)\{$/ p
132 /^ (duration|compound)\{$/, /^ \}$/ p 158 /^ (duration|compound)\{$/, /^ \}$/ p
133 /^ \}$/ p 159 /^ \}$/ p
134 d 160 d
135 }' ${i} 161 }' ${i}
136 done 162 done
137 } 163 }
138 164
139 # big5han and gb2312han collation do not make any sense and nobody uses them. 165 # big5han and gb2312han collation do not make any sense and nobody uses them.
140 function remove_legacy_chinese_codepoint_collation { 166 function remove_legacy_chinese_codepoint_collation {
141 echo "Removing Big5 / GB2312 collation data from Chinese locale" 167 echo "Removing Big5 / GB2312 / UniHan collation data from Chinese locale"
142 target="${dataroot}/coll/zh.txt" 168 target="${dataroot}/coll/zh.txt"
143 echo "Overwriting ${target}" 169 echo "Overwriting ${target}"
144 sed -r -i '/^ (big5|gb2312)han\{$/,/^ \}$/ d' ${target} 170 sed -r -i '/^ (uni|big5|gb2312)han\{$/,/^ \}$/ d' ${target}
145 } 171 }
146 172
147 dataroot="$(dirname $0)/../source/data" 173 dataroot="$(dirname $0)/../source/data"
148 localedatapath="${dataroot}/locales" 174 localedatapath="${dataroot}/locales"
149 langdatapath="${dataroot}/lang" 175 langdatapath="${dataroot}/lang"
150 176
151 177
152 178
153 filter_display_language_names 179 filter_display_language_names
154 abridge_locale_data_for_non_ui_languages 180 abridge_locale_data_for_non_ui_languages
155 filter_currency_data 181 filter_currency_data
156 filter_region_data 182 filter_region_data
157 remove_legacy_chinese_codepoint_collation 183 remove_legacy_chinese_codepoint_collation
158 filter_locale_data 184 filter_unit_data
159 185
160 # Chromium OS needs exemplar cities for timezones, but not Chromium. 186 # Chromium OS needs exemplar cities for timezones, but not Chromium.
161 # It'll save 400kB (uncompressed), but the size difference in 187 # It'll save 400kB (uncompressed), but the size difference in
162 # 7z compressed installer is <= 100kB. 188 # 7z compressed installer is <= 100kB.
163 # TODO(jshin): Make separate data files for CrOS and Chromium. 189 # TODO(jshin): Make separate data files for CrOS and Chromium.
164 #remove_exemplar_cities 190 #remove_exemplar_cities
OLDNEW
« no previous file with comments | « scripts/currencies_to_drop.list ('k') | source/data/coll/collocal.mk » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698