| OLD | NEW |
| 1 #!/bin/bash | 1 #!/bin/bash |
| 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 | 6 |
| 7 # Remove display names for languages that are not listed in the accept-language | 7 # Remove display names for languages that are not listed in the accept-language |
| 8 # list of Chromium. | 8 # list of Chromium. |
| 9 function filter_display_language_names { | 9 function filter_display_language_names { |
| 10 for lang in $(grep -v '^#' accept_lang.list) | 10 for lang in $(grep -v '^#' accept_lang.list) |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 44 done | 44 done |
| 45 | 45 |
| 46 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list) | 46 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list) |
| 47 | 47 |
| 48 echo Creating minimum locale data in ${localedatapath} | 48 echo Creating minimum locale data in ${localedatapath} |
| 49 for lang in ${EXTRA_LANGUAGES} | 49 for lang in ${EXTRA_LANGUAGES} |
| 50 do | 50 do |
| 51 target=${localedatapath}/${lang}.txt | 51 target=${localedatapath}/${lang}.txt |
| 52 [ -e ${target} ] || { echo "missing ${lang}"; continue; } | 52 [ -e ${target} ] || { echo "missing ${lang}"; continue; } |
| 53 echo Overwriting ${target} ... | 53 echo Overwriting ${target} ... |
| 54 |
| 55 # Do not include '%%Parent' line on purpose. |
| 54 sed -n -r -i \ | 56 sed -n -r -i \ |
| 55 '1, /^'${lang}'\{$/p | 57 '1, /^'${lang}'\{$/p |
| 56 /^ "%%ALIAS"\{/p | 58 /^ "%%ALIAS"\{/p |
| 57 /^ AuxExemplarCharacters\{.*\}$/p | 59 /^ AuxExemplarCharacters\{.*\}$/p |
| 58 /^ AuxExemplarCharacters\{$/, /^ \}$/p | 60 /^ AuxExemplarCharacters\{$/, /^ \}$/p |
| 59 /^ ExemplarCharacters\{.*\}$/p | 61 /^ ExemplarCharacters\{.*\}$/p |
| 60 /^ ExemplarCharacters\{$/, /^ \}$/p | 62 /^ ExemplarCharacters\{$/, /^ \}$/p |
| 61 /^ (LocaleScript|layout)\{$/, /^ \}$/p | 63 /^ (LocaleScript|layout)\{$/, /^ \}$/p |
| 62 /^ Version\{.*$/p | 64 /^ Version\{.*$/p |
| 63 /^\}$/p' ${target} | 65 /^\}$/p' ${target} |
| 64 done | 66 done |
| 65 | 67 |
| 66 echo Creating minimum locale data in ${langdatapath} | 68 echo Creating minimum locale data in ${langdatapath} |
| 67 for lang in ${EXTRA_LANGUAGES} | 69 for lang in ${EXTRA_LANGUAGES} |
| 68 do | 70 do |
| 69 target=${langdatapath}/${lang}.txt | 71 target=${langdatapath}/${lang}.txt |
| 70 [ -e ${target} ] || { echo "missing ${lang}"; continue; } | 72 [ -e ${target} ] || { echo "missing ${lang}"; continue; } |
| 71 echo Overwriting ${target} ... | 73 echo Overwriting ${target} ... |
| 74 |
| 75 # Do not include '%%Parent' line on purpose. |
| 72 sed -n -r -i \ | 76 sed -n -r -i \ |
| 73 '1, /^'${lang}'\{$/p | 77 '1, /^'${lang}'\{$/p |
| 78 /^ "%%ALIAS"\{/p |
| 74 /^ Languages\{$/, /^ \}$/ { | 79 /^ Languages\{$/, /^ \}$/ { |
| 75 /^ Languages\{$/p | 80 /^ Languages\{$/p |
| 76 /^ '${lang}'\{.*\}$/p | 81 /^ '${lang}'\{.*\}$/p |
| 77 /^ \}$/p | 82 /^ \}$/p |
| 78 } | 83 } |
| 79 /^\}$/p' ${target} | 84 /^\}$/p' ${target} |
| 80 done | 85 done |
| 81 } | 86 } |
| 82 | 87 |
| 83 # Drop historic currencies. | 88 # Keep only the currencies used by the larget 150 economies in terms of GDP. |
| 84 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. | 89 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. |
| 85 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies | 90 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies |
| 86 function filter_currency_data { | 91 function filter_currency_data { |
| 87 for currency in $(grep -v '^#' currencies_to_drop.list) | 92 unset KEEPLIST |
| 93 for currency in $(grep -v '^#' currencies.list) |
| 88 do | 94 do |
| 89 OP=${DROPLIST:+|} | 95 OP=${KEEPLIST:+|} |
| 90 DROPLIST=${DROPLIST}${OP}${currency} | 96 KEEPLIST=${KEEPLIST}${OP}${currency} |
| 91 done | 97 done |
| 92 DROPLIST="(${DROPLIST})\{" | 98 KEEPLIST="(${KEEPLIST})" |
| 93 | 99 |
| 94 cd "${dataroot}/curr" | 100 for i in ${dataroot}/curr/*.txt |
| 95 for i in *.txt | |
| 96 do | 101 do |
| 97 [ $i != 'supplementalData.txt' ] && \ | 102 locale=$(basename $i .txt) |
| 98 sed -r -i '/^ '$DROPLIST'/, /^ }/ d' $i | 103 [ $locale == 'supplementalData' ] && continue; |
| 104 echo "Overwriting $i for $locale" |
| 105 sed -n -r -i \ |
| 106 '1, /^'${locale}'\{$/ p |
| 107 /^ "%%ALIAS"\{/p |
| 108 /^ %%Parent\{/p |
| 109 /^ Currencies\{$/, /^ \}$/ { |
| 110 /^ Currencies\{$/ p |
| 111 /^ '$KEEPLIST'\{$/, /^ \}$/ p |
| 112 /^ \}$/ p |
| 113 } |
| 114 /^ Currencies%narrow\{$/, /^ \}$/ { |
| 115 /^ Currencies%narrow\{$/ p |
| 116 /^ '$KEEPLIST'\{".*\}$/ p |
| 117 /^ \}$/ p |
| 118 } |
| 119 /^ CurrencyPlurals\{$/, /^ \}$/ { |
| 120 /^ CurrencyPlurals\{$/ p |
| 121 /^ '$KEEPLIST'\{$/, /^ \}$/ p |
| 122 /^ \}$/ p |
| 123 } |
| 124 /^ [cC]urrency(Map|Meta|Spacing|UnitPatterns)\{$/, /^ \}$/ p |
| 125 /^ Version\{.*\}$/p |
| 126 /^\}$/p' $i |
| 99 done | 127 done |
| 100 } | 128 } |
| 101 | 129 |
| 102 # Remove the display names for numeric region codes other than | 130 # Remove the display names for numeric region codes other than |
| 103 # 419 (Latin America) because we don't use them. | 131 # 419 (Latin America) because we don't use them. |
| 104 function filter_region_data { | 132 function filter_region_data { |
| 105 cd "${dataroot}/region" | 133 sed -i '/[0-35-9][0-9][0-9]{/ d' ${dataroot}/region/*.txt |
| 106 sed -i '/[0-35-9][0-9][0-9]{/ d' *.txt | |
| 107 } | 134 } |
| 108 | 135 |
| 109 | 136 |
| 110 | 137 |
| 111 function remove_exemplar_cities { | 138 function remove_exemplar_cities { |
| 112 cd "${dataroot}/zone" | 139 for i in ${dataroot}/zone/*.txt |
| 113 for i in *.txt | |
| 114 do | 140 do |
| 115 [ $i != 'root.txt' ] && \ | 141 [ $i != 'root.txt' ] && \ |
| 116 sed -i '/^ zoneStrings/, /^ "meta:/ { | 142 sed -i '/^ zoneStrings/, /^ "meta:/ { |
| 117 /^ zoneStrings/ p | 143 /^ zoneStrings/ p |
| 118 /^ "meta:/ p | 144 /^ "meta:/ p |
| 119 d | 145 d |
| 120 }' $i | 146 }' $i |
| 121 done | 147 done |
| 122 } | 148 } |
| 123 | 149 |
| 124 # Keep only duration and compound in units* sections. | 150 # Keep only duration and compound in units* sections. |
| 125 function filter_locale_data { | 151 function filter_unit_data { |
| 126 for i in ${dataroot}/locales/*.txt | 152 for i in ${dataroot}/unit/*.txt |
| 127 do | 153 do |
| 128 echo Overwriting $i ... | 154 echo Overwriting $i ... |
| 129 sed -r -i \ | 155 sed -r -i \ |
| 130 '/^ units(|Narrow|Short)\{$/, /^ \}$/ { | 156 '/^ units(|Narrow|Short)\{$/, /^ \}$/ { |
| 131 /^ units(|Narrow|Short)\{$/ p | 157 /^ units(|Narrow|Short)\{$/ p |
| 132 /^ (duration|compound)\{$/, /^ \}$/ p | 158 /^ (duration|compound)\{$/, /^ \}$/ p |
| 133 /^ \}$/ p | 159 /^ \}$/ p |
| 134 d | 160 d |
| 135 }' ${i} | 161 }' ${i} |
| 136 done | 162 done |
| 137 } | 163 } |
| 138 | 164 |
| 139 # big5han and gb2312han collation do not make any sense and nobody uses them. | 165 # big5han and gb2312han collation do not make any sense and nobody uses them. |
| 140 function remove_legacy_chinese_codepoint_collation { | 166 function remove_legacy_chinese_codepoint_collation { |
| 141 echo "Removing Big5 / GB2312 collation data from Chinese locale" | 167 echo "Removing Big5 / GB2312 / UniHan collation data from Chinese locale" |
| 142 target="${dataroot}/coll/zh.txt" | 168 target="${dataroot}/coll/zh.txt" |
| 143 echo "Overwriting ${target}" | 169 echo "Overwriting ${target}" |
| 144 sed -r -i '/^ (big5|gb2312)han\{$/,/^ \}$/ d' ${target} | 170 sed -r -i '/^ (uni|big5|gb2312)han\{$/,/^ \}$/ d' ${target} |
| 145 } | 171 } |
| 146 | 172 |
| 147 dataroot="$(dirname $0)/../source/data" | 173 dataroot="$(dirname $0)/../source/data" |
| 148 localedatapath="${dataroot}/locales" | 174 localedatapath="${dataroot}/locales" |
| 149 langdatapath="${dataroot}/lang" | 175 langdatapath="${dataroot}/lang" |
| 150 | 176 |
| 151 | 177 |
| 152 | 178 |
| 153 filter_display_language_names | 179 filter_display_language_names |
| 154 abridge_locale_data_for_non_ui_languages | 180 abridge_locale_data_for_non_ui_languages |
| 155 filter_currency_data | 181 filter_currency_data |
| 156 filter_region_data | 182 filter_region_data |
| 157 remove_legacy_chinese_codepoint_collation | 183 remove_legacy_chinese_codepoint_collation |
| 158 filter_locale_data | 184 filter_unit_data |
| 159 | 185 |
| 160 # Chromium OS needs exemplar cities for timezones, but not Chromium. | 186 # Chromium OS needs exemplar cities for timezones, but not Chromium. |
| 161 # It'll save 400kB (uncompressed), but the size difference in | 187 # It'll save 400kB (uncompressed), but the size difference in |
| 162 # 7z compressed installer is <= 100kB. | 188 # 7z compressed installer is <= 100kB. |
| 163 # TODO(jshin): Make separate data files for CrOS and Chromium. | 189 # TODO(jshin): Make separate data files for CrOS and Chromium. |
| 164 #remove_exemplar_cities | 190 #remove_exemplar_cities |
| OLD | NEW |