OLD | NEW |
1 #!/bin/bash | 1 #!/bin/bash |
2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 | 6 |
7 # Remove display names for languages that are not listed in the accept-language | 7 # Remove display names for languages that are not listed in the accept-language |
8 # list of Chromium. | 8 # list of Chromium. |
9 function filter_display_language_names { | 9 function filter_display_language_names { |
10 for lang in $(grep -v '^#' accept_lang.list) | 10 for lang in $(grep -v '^#' accept_lang.list) |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
44 done | 44 done |
45 | 45 |
46 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list) | 46 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list) |
47 | 47 |
48 echo Creating minimum locale data in ${localedatapath} | 48 echo Creating minimum locale data in ${localedatapath} |
49 for lang in ${EXTRA_LANGUAGES} | 49 for lang in ${EXTRA_LANGUAGES} |
50 do | 50 do |
51 target=${localedatapath}/${lang}.txt | 51 target=${localedatapath}/${lang}.txt |
52 [ -e ${target} ] || { echo "missing ${lang}"; continue; } | 52 [ -e ${target} ] || { echo "missing ${lang}"; continue; } |
53 echo Overwriting ${target} ... | 53 echo Overwriting ${target} ... |
| 54 |
| 55 # Do not include '%%Parent' line on purpose. |
54 sed -n -r -i \ | 56 sed -n -r -i \ |
55 '1, /^'${lang}'\{$/p | 57 '1, /^'${lang}'\{$/p |
56 /^ "%%ALIAS"\{/p | 58 /^ "%%ALIAS"\{/p |
57 /^ AuxExemplarCharacters\{.*\}$/p | 59 /^ AuxExemplarCharacters\{.*\}$/p |
58 /^ AuxExemplarCharacters\{$/, /^ \}$/p | 60 /^ AuxExemplarCharacters\{$/, /^ \}$/p |
59 /^ ExemplarCharacters\{.*\}$/p | 61 /^ ExemplarCharacters\{.*\}$/p |
60 /^ ExemplarCharacters\{$/, /^ \}$/p | 62 /^ ExemplarCharacters\{$/, /^ \}$/p |
61 /^ (LocaleScript|layout)\{$/, /^ \}$/p | 63 /^ (LocaleScript|layout)\{$/, /^ \}$/p |
62 /^ Version\{.*$/p | 64 /^ Version\{.*$/p |
63 /^\}$/p' ${target} | 65 /^\}$/p' ${target} |
64 done | 66 done |
65 | 67 |
66 echo Creating minimum locale data in ${langdatapath} | 68 echo Creating minimum locale data in ${langdatapath} |
67 for lang in ${EXTRA_LANGUAGES} | 69 for lang in ${EXTRA_LANGUAGES} |
68 do | 70 do |
69 target=${langdatapath}/${lang}.txt | 71 target=${langdatapath}/${lang}.txt |
70 [ -e ${target} ] || { echo "missing ${lang}"; continue; } | 72 [ -e ${target} ] || { echo "missing ${lang}"; continue; } |
71 echo Overwriting ${target} ... | 73 echo Overwriting ${target} ... |
| 74 |
| 75 # Do not include '%%Parent' line on purpose. |
72 sed -n -r -i \ | 76 sed -n -r -i \ |
73 '1, /^'${lang}'\{$/p | 77 '1, /^'${lang}'\{$/p |
| 78 /^ "%%ALIAS"\{/p |
74 /^ Languages\{$/, /^ \}$/ { | 79 /^ Languages\{$/, /^ \}$/ { |
75 /^ Languages\{$/p | 80 /^ Languages\{$/p |
76 /^ '${lang}'\{.*\}$/p | 81 /^ '${lang}'\{.*\}$/p |
77 /^ \}$/p | 82 /^ \}$/p |
78 } | 83 } |
79 /^\}$/p' ${target} | 84 /^\}$/p' ${target} |
80 done | 85 done |
81 } | 86 } |
82 | 87 |
83 # Drop historic currencies. | 88 # Keep only the currencies used by the larget 150 economies in terms of GDP. |
84 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. | 89 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. |
85 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies | 90 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies |
86 function filter_currency_data { | 91 function filter_currency_data { |
87 for currency in $(grep -v '^#' currencies_to_drop.list) | 92 unset KEEPLIST |
| 93 for currency in $(grep -v '^#' currencies.list) |
88 do | 94 do |
89 OP=${DROPLIST:+|} | 95 OP=${KEEPLIST:+|} |
90 DROPLIST=${DROPLIST}${OP}${currency} | 96 KEEPLIST=${KEEPLIST}${OP}${currency} |
91 done | 97 done |
92 DROPLIST="(${DROPLIST})\{" | 98 KEEPLIST="(${KEEPLIST})" |
93 | 99 |
94 cd "${dataroot}/curr" | 100 for i in ${dataroot}/curr/*.txt |
95 for i in *.txt | |
96 do | 101 do |
97 [ $i != 'supplementalData.txt' ] && \ | 102 locale=$(basename $i .txt) |
98 sed -r -i '/^ '$DROPLIST'/, /^ }/ d' $i | 103 [ $locale == 'supplementalData' ] && continue; |
| 104 echo "Overwriting $i for $locale" |
| 105 sed -n -r -i \ |
| 106 '1, /^'${locale}'\{$/ p |
| 107 /^ "%%ALIAS"\{/p |
| 108 /^ %%Parent\{/p |
| 109 /^ Currencies\{$/, /^ \}$/ { |
| 110 /^ Currencies\{$/ p |
| 111 /^ '$KEEPLIST'\{$/, /^ \}$/ p |
| 112 /^ \}$/ p |
| 113 } |
| 114 /^ Currencies%narrow\{$/, /^ \}$/ { |
| 115 /^ Currencies%narrow\{$/ p |
| 116 /^ '$KEEPLIST'\{".*\}$/ p |
| 117 /^ \}$/ p |
| 118 } |
| 119 /^ CurrencyPlurals\{$/, /^ \}$/ { |
| 120 /^ CurrencyPlurals\{$/ p |
| 121 /^ '$KEEPLIST'\{$/, /^ \}$/ p |
| 122 /^ \}$/ p |
| 123 } |
| 124 /^ [cC]urrency(Map|Meta|Spacing|UnitPatterns)\{$/, /^ \}$/ p |
| 125 /^ Version\{.*\}$/p |
| 126 /^\}$/p' $i |
99 done | 127 done |
100 } | 128 } |
101 | 129 |
102 # Remove the display names for numeric region codes other than | 130 # Remove the display names for numeric region codes other than |
103 # 419 (Latin America) because we don't use them. | 131 # 419 (Latin America) because we don't use them. |
104 function filter_region_data { | 132 function filter_region_data { |
105 cd "${dataroot}/region" | 133 sed -i '/[0-35-9][0-9][0-9]{/ d' ${dataroot}/region/*.txt |
106 sed -i '/[0-35-9][0-9][0-9]{/ d' *.txt | |
107 } | 134 } |
108 | 135 |
109 | 136 |
110 | 137 |
111 function remove_exemplar_cities { | 138 function remove_exemplar_cities { |
112 cd "${dataroot}/zone" | 139 for i in ${dataroot}/zone/*.txt |
113 for i in *.txt | |
114 do | 140 do |
115 [ $i != 'root.txt' ] && \ | 141 [ $i != 'root.txt' ] && \ |
116 sed -i '/^ zoneStrings/, /^ "meta:/ { | 142 sed -i '/^ zoneStrings/, /^ "meta:/ { |
117 /^ zoneStrings/ p | 143 /^ zoneStrings/ p |
118 /^ "meta:/ p | 144 /^ "meta:/ p |
119 d | 145 d |
120 }' $i | 146 }' $i |
121 done | 147 done |
122 } | 148 } |
123 | 149 |
124 # Keep only duration and compound in units* sections. | 150 # Keep only duration and compound in units* sections. |
125 function filter_locale_data { | 151 function filter_unit_data { |
126 for i in ${dataroot}/locales/*.txt | 152 for i in ${dataroot}/unit/*.txt |
127 do | 153 do |
128 echo Overwriting $i ... | 154 echo Overwriting $i ... |
129 sed -r -i \ | 155 sed -r -i \ |
130 '/^ units(|Narrow|Short)\{$/, /^ \}$/ { | 156 '/^ units(|Narrow|Short)\{$/, /^ \}$/ { |
131 /^ units(|Narrow|Short)\{$/ p | 157 /^ units(|Narrow|Short)\{$/ p |
132 /^ (duration|compound)\{$/, /^ \}$/ p | 158 /^ (duration|compound)\{$/, /^ \}$/ p |
133 /^ \}$/ p | 159 /^ \}$/ p |
134 d | 160 d |
135 }' ${i} | 161 }' ${i} |
136 done | 162 done |
137 } | 163 } |
138 | 164 |
139 # big5han and gb2312han collation do not make any sense and nobody uses them. | 165 # big5han and gb2312han collation do not make any sense and nobody uses them. |
140 function remove_legacy_chinese_codepoint_collation { | 166 function remove_legacy_chinese_codepoint_collation { |
141 echo "Removing Big5 / GB2312 collation data from Chinese locale" | 167 echo "Removing Big5 / GB2312 / UniHan collation data from Chinese locale" |
142 target="${dataroot}/coll/zh.txt" | 168 target="${dataroot}/coll/zh.txt" |
143 echo "Overwriting ${target}" | 169 echo "Overwriting ${target}" |
144 sed -r -i '/^ (big5|gb2312)han\{$/,/^ \}$/ d' ${target} | 170 sed -r -i '/^ (uni|big5|gb2312)han\{$/,/^ \}$/ d' ${target} |
145 } | 171 } |
146 | 172 |
147 dataroot="$(dirname $0)/../source/data" | 173 dataroot="$(dirname $0)/../source/data" |
148 localedatapath="${dataroot}/locales" | 174 localedatapath="${dataroot}/locales" |
149 langdatapath="${dataroot}/lang" | 175 langdatapath="${dataroot}/lang" |
150 | 176 |
151 | 177 |
152 | 178 |
153 filter_display_language_names | 179 filter_display_language_names |
154 abridge_locale_data_for_non_ui_languages | 180 abridge_locale_data_for_non_ui_languages |
155 filter_currency_data | 181 filter_currency_data |
156 filter_region_data | 182 filter_region_data |
157 remove_legacy_chinese_codepoint_collation | 183 remove_legacy_chinese_codepoint_collation |
158 filter_locale_data | 184 filter_unit_data |
159 | 185 |
160 # Chromium OS needs exemplar cities for timezones, but not Chromium. | 186 # Chromium OS needs exemplar cities for timezones, but not Chromium. |
161 # It'll save 400kB (uncompressed), but the size difference in | 187 # It'll save 400kB (uncompressed), but the size difference in |
162 # 7z compressed installer is <= 100kB. | 188 # 7z compressed installer is <= 100kB. |
163 # TODO(jshin): Make separate data files for CrOS and Chromium. | 189 # TODO(jshin): Make separate data files for CrOS and Chromium. |
164 #remove_exemplar_cities | 190 #remove_exemplar_cities |
OLD | NEW |