Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2013 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """ | |
| 7 localize.py -- Generates an output file from the given template replacing | |
| 8 variables and localizing strings. | |
| 9 | |
| 10 The script uses Jinja2 template processing library (src/third_party/jinja2). | |
| 11 Variables available to the templates: | |
| 12 - |languages| - the list of languages passed on the command line. ('-l'). | |
| 13 - Each KEY=VALUE define ('-d') can be accesses as |KEY|. | |
| 14 - |official_build| is set to '1' when CHROME_BUILD_TYPE environment variable | |
| 15 is set to "_official". | |
| 16 | |
| 17 Filters: | |
| 18 - GetCodepage - returns the code page for the given language. | |
| 19 - GetCodepageDecimal same as GetCodepage, but returns a decimal value. | |
| 20 - GetLangId - returns Win32 LANGID. | |
| 21 - GetPrimaryLanguage - returns a named Win32 constant specifing the primary | |
| 22 language ID. | |
| 23 - GetSublanguage - returns a named Win32 constant specifing the sublanguage | |
| 24 ID. | |
| 25 | |
| 26 Globals: | |
| 27 - SelectLanguage(language) - allows to select the language to the used by | |
| 28 {% trans %}{% endtrans %} statements. | |
| 29 | |
| 30 """ | |
| 31 | |
| 32 import io | |
| 33 import json | |
| 34 from optparse import OptionParser | |
| 35 import os | |
| 36 import sys | |
| 37 from string import Template | |
| 38 | |
| 39 ''' | |
| 40 See https://code.google.com/p/grit-i18n/source/browse/trunk/grit/format/rc.py | |
| 41 (revision r79) | |
| 42 | |
| 43 This dictionary defines the language lookup table, which is used for replacing | |
| 44 the GRIT expand variables for language info in Product Version resource. The key | |
| 45 is the language ISO country code, and the value specifies the corresponding | |
| 46 locale identifier, code page, primary language and sublanguage. | |
| 47 | |
| 48 LCID resource: http://msdn.microsoft.com/en-us/library/ms776294.aspx | |
| 49 Codepage resource: http://www.science.co.il/language/locale-codes.asp | |
| 50 Language ID resource: http://msdn.microsoft.com/en-us/library/ms776294.aspx | |
| 51 | |
| 52 There is no appropriate sublang for Spanish (Latin America) [es-419], so we | |
| 53 use Mexico. SUBLANG_DEFAULT would incorrectly map to Spain. Unlike other | |
| 54 Latin American countries, Mexican Spanish is supported by VERSIONINFO: | |
| 55 http://msdn.microsoft.com/en-us/library/aa381058.aspx | |
| 56 | |
| 57 ''' | |
| 58 | |
| 59 _LANGUAGE_LANGID = { | |
| 60 # Language neutral LCID, unicode(1200) code page. | |
| 61 'neutral' : [ '0000', '04b0', 'LANG_NEUTRAL', 'SUBLANG_NEUTRAL' ], | |
| 62 # LANG_USER_DEFAULT LCID, unicode(1200) code page. | |
| 63 'userdefault' : [ '0400', '04b0', 'LANG_NEUTRAL', 'SUBLANG_DEFAULT' ], | |
| 64 'ar' : [ '0401', '04e8', 'LANG_ARABIC', 'SUBLANG_DEFAULT' ], | |
|
garykac
2013/07/09 00:34:12
Is there an order to this list? I expected it to
alexeypa (please no reviews)
2013/07/09 01:05:23
Done.
| |
| 65 'fi' : [ '040b', '04e4', 'LANG_FINNISH', 'SUBLANG_DEFAULT' ], | |
| 66 'ko' : [ '0412', '03b5', 'LANG_KOREAN', 'SUBLANG_KOREAN' ], | |
| 67 'es' : [ '040a', '04e4', 'LANG_SPANISH', 'SUBLANG_SPANISH_MODERN' ], | |
| 68 'bg' : [ '0402', '04e3', 'LANG_BULGARIAN', 'SUBLANG_DEFAULT' ], | |
| 69 # No codepage for filipino, use unicode(1200). | |
| 70 'fil' : [ '0464', '04e4', '100', 'SUBLANG_DEFAULT' ], | |
| 71 'fr' : [ '040c', '04e4', 'LANG_FRENCH', 'SUBLANG_FRENCH' ], | |
| 72 'lv' : [ '0426', '04e9', 'LANG_LATVIAN', 'SUBLANG_DEFAULT' ], | |
| 73 'sv' : [ '041d', '04e4', 'LANG_SWEDISH', 'SUBLANG_SWEDISH' ], | |
| 74 'ca' : [ '0403', '04e4', 'LANG_CATALAN', 'SUBLANG_DEFAULT' ], | |
| 75 'de' : [ '0407', '04e4', 'LANG_GERMAN', 'SUBLANG_GERMAN' ], | |
| 76 'lt' : [ '0427', '04e9', 'LANG_LITHUANIAN', 'SUBLANG_LITHUANIAN' ], | |
| 77 'zh-CN' : [ '0804', '03a8', 'LANG_CHINESE', 'SUBLANG_CHINESE_SIMPLIFIED' ], | |
| 78 'zh-TW' : [ '0404', '03b6', 'LANG_CHINESE', 'SUBLANG_CHINESE_TRADITIONAL' ], | |
| 79 'zh-HK' : [ '0c04', '03b6', 'LANG_CHINESE', 'SUBLANG_CHINESE_HONGKONG' ], | |
| 80 'el' : [ '0408', '04e5', 'LANG_GREEK', 'SUBLANG_DEFAULT' ], | |
| 81 'nb' : [ '0414', '04e4', 'LANG_NORWEGIAN', 'SUBLANG_DEFAULT' ], | |
| 82 'no' : [ '0414', '04e4', 'LANG_NORWEGIAN', 'SUBLANG_DEFAULT' ], | |
| 83 'th' : [ '041e', '036a', 'LANG_THAI', 'SUBLANG_DEFAULT' ], | |
| 84 'he' : [ '040d', '04e7', 'LANG_HEBREW', 'SUBLANG_DEFAULT' ], | |
| 85 'iw' : [ '040d', '04e7', 'LANG_HEBREW', 'SUBLANG_DEFAULT' ], | |
| 86 'pl' : [ '0415', '04e2', 'LANG_POLISH', 'SUBLANG_DEFAULT' ], | |
| 87 'tr' : [ '041f', '04e6', 'LANG_TURKISH', 'SUBLANG_DEFAULT' ], | |
| 88 'hr' : [ '041a', '04e4', 'LANG_CROATIAN', 'SUBLANG_DEFAULT' ], | |
| 89 # No codepage for Hindi, use unicode(1200). | |
| 90 'hi' : [ '0439', '04b0', 'LANG_HINDI', 'SUBLANG_DEFAULT' ], | |
| 91 'pt-PT' : [ '0816', '04e4', 'LANG_PORTUGUESE', 'SUBLANG_PORTUGUESE' ], | |
| 92 'pt-BR' : [ '0416', '04e4', 'LANG_PORTUGUESE', 'SUBLANG_DEFAULT' ], | |
| 93 'uk' : [ '0422', '04e3', 'LANG_UKRAINIAN', 'SUBLANG_DEFAULT' ], | |
| 94 'cs' : [ '0405', '04e2', 'LANG_CZECH', 'SUBLANG_DEFAULT' ], | |
| 95 'hu' : [ '040e', '04e2', 'LANG_HUNGARIAN', 'SUBLANG_DEFAULT' ], | |
| 96 'ro' : [ '0418', '04e2', 'LANG_ROMANIAN', 'SUBLANG_DEFAULT' ], | |
| 97 # No codepage for Urdu, use unicode(1200). | |
| 98 'ur' : [ '0420', '04b0', 'LANG_URDU', 'SUBLANG_DEFAULT' ], | |
| 99 'da' : [ '0406', '04e4', 'LANG_DANISH', 'SUBLANG_DEFAULT' ], | |
| 100 'is' : [ '040f', '04e4', 'LANG_ICELANDIC', 'SUBLANG_DEFAULT' ], | |
| 101 'ru' : [ '0419', '04e3', 'LANG_RUSSIAN', 'SUBLANG_DEFAULT' ], | |
| 102 'vi' : [ '042a', '04ea', 'LANG_VIETNAMESE', 'SUBLANG_DEFAULT' ], | |
| 103 'nl' : [ '0413', '04e4', 'LANG_DUTCH', 'SUBLANG_DEFAULT' ], | |
| 104 'id' : [ '0421', '04e4', 'LANG_INDONESIAN', 'SUBLANG_DEFAULT' ], | |
| 105 'sr' : [ '081a', '04e2', 'LANG_SERBIAN', 'SUBLANG_SERBIAN_CYRILLIC' ], | |
| 106 'en-GB' : [ '0809', '040e', 'LANG_ENGLISH', 'SUBLANG_ENGLISH_UK' ], | |
| 107 'it' : [ '0410', '04e4', 'LANG_ITALIAN', 'SUBLANG_DEFAULT' ], | |
| 108 'sk' : [ '041b', '04e2', 'LANG_SLOVAK', 'SUBLANG_DEFAULT' ], | |
| 109 'et' : [ '0425', '04e9', 'LANG_ESTONIAN', 'SUBLANG_DEFAULT' ], | |
| 110 'ja' : [ '0411', '03a4', 'LANG_JAPANESE', 'SUBLANG_DEFAULT' ], | |
| 111 'sl' : [ '0424', '04e2', 'LANG_SLOVENIAN', 'SUBLANG_DEFAULT' ], | |
| 112 'en' : [ '0409', '04b0', 'LANG_ENGLISH', 'SUBLANG_ENGLISH_US' ], | |
| 113 # LCID for Mexico; Windows does not support L.A. LCID. | |
| 114 'es-419' : [ '080a', '04e4', 'LANG_SPANISH', 'SUBLANG_SPANISH_MEXICAN' ], | |
| 115 # No codepage for Bengali, use unicode(1200). | |
| 116 'bn' : [ '0445', '04b0', 'LANG_BENGALI', 'SUBLANG_DEFAULT' ], | |
| 117 'fa' : [ '0429', '04e8', 'LANG_PERSIAN', 'SUBLANG_DEFAULT' ], | |
| 118 # No codepage for Gujarati, use unicode(1200). | |
| 119 'gu' : [ '0447', '04b0', 'LANG_GUJARATI', 'SUBLANG_DEFAULT' ], | |
| 120 # No codepage for Kannada, use unicode(1200). | |
| 121 'kn' : [ '044b', '04b0', 'LANG_KANNADA', 'SUBLANG_DEFAULT' ], | |
| 122 # Malay (Malaysia) [ms-MY] | |
| 123 'ms' : [ '043e', '04e4', 'LANG_MALAY', 'SUBLANG_DEFAULT' ], | |
| 124 # No codepage for Malayalam, use unicode(1200). | |
| 125 'ml' : [ '044c', '04b0', 'LANG_MALAYALAM', 'SUBLANG_DEFAULT' ], | |
| 126 # No codepage for Marathi, use unicode(1200). | |
| 127 'mr' : [ '044e', '04b0', 'LANG_MARATHI', 'SUBLANG_DEFAULT' ], | |
| 128 # No codepage for Oriya , use unicode(1200). | |
| 129 'or' : [ '0448', '04b0', 'LANG_ORIYA', 'SUBLANG_DEFAULT' ], | |
| 130 # No codepage for Tamil, use unicode(1200). | |
| 131 'ta' : [ '0449', '04b0', 'LANG_TAMIL', 'SUBLANG_DEFAULT' ], | |
| 132 # No codepage for Telugu, use unicode(1200). | |
| 133 'te' : [ '044a', '04b0', 'LANG_TELUGU', 'SUBLANG_DEFAULT' ], | |
| 134 # No codepage for Amharic, use unicode(1200). >= Vista. | |
| 135 'am' : [ '045e', '04b0', 'LANG_AMHARIC', 'SUBLANG_DEFAULT' ], | |
| 136 'sw' : [ '0441', '04e4', 'LANG_SWAHILI', 'SUBLANG_DEFAULT' ], | |
| 137 'af' : [ '0436', '04e4', 'LANG_AFRIKAANS', 'SUBLANG_DEFAULT' ], | |
| 138 'eu' : [ '042d', '04e4', 'LANG_BASQUE', 'SUBLANG_DEFAULT' ], | |
| 139 'fr-CA' : [ '0c0c', '04e4', 'LANG_FRENCH', 'SUBLANG_FRENCH_CANADIAN' ], | |
| 140 'gl' : [ '0456', '04e4', 'LANG_GALICIAN', 'SUBLANG_DEFAULT' ], | |
| 141 # No codepage for Zulu, use unicode(1200). | |
| 142 'zu' : [ '0435', '04b0', 'LANG_ZULU', 'SUBLANG_DEFAULT' ], | |
| 143 'pa' : [ '0446', '04b0', 'LANG_PUNJABI', 'SUBLANG_PUNJABI_INDIA' ], | |
| 144 'sa' : [ '044f', '04b0', 'LANG_SANSKRIT', 'SUBLANG_SANSKRIT_INDIA' ], | |
| 145 'si' : [ '045b', '04b0', 'LANG_SINHALESE', 'SUBLANG_SINHALESE_SRI_LANKA' ], | |
| 146 'ne' : [ '0461', '04b0', 'LANG_NEPALI', 'SUBLANG_NEPALI_NEPAL' ], | |
| 147 'ti' : [ '0873', '04b0', 'LANG_TIGRIGNA', 'SUBLANG_TIGRIGNA_ERITREA' ], | |
| 148 'fake-bidi' : [ '040d', '04e7', 'LANG_HEBREW', 'SUBLANG_DEFAULT' ], | |
| 149 } | |
| 150 | |
| 151 # Right-To-Left languages | |
| 152 _RTL_LANGUAGES = ( | |
| 153 'ar', # Arabic | |
| 154 'fa', # Farsi | |
| 155 'iw', # Hebrew | |
| 156 'ks', # Kashmiri | |
| 157 'ku', # Kurdish | |
| 158 'ps', # Pashto | |
| 159 'ur', # Urdu | |
| 160 'yi', # Yiddish | |
| 161 ) | |
| 162 | |
| 163 | |
| 164 def GetCodepage(language): | |
| 165 """ Returns the codepage for the given |language|. """ | |
| 166 langid = _LANGUAGE_LANGID[language] | |
| 167 return langid[1] | |
| 168 | |
| 169 | |
| 170 def GetCodepageDecimal(language): | |
| 171 """ Returns the codepage for the given |language| as a decimal value. """ | |
| 172 langid = _LANGUAGE_LANGID[language] | |
| 173 return str(int(langid[1], 16)) | |
| 174 | |
| 175 | |
| 176 def GetLangId(language): | |
| 177 """ Returns the language id for the given |language|. """ | |
| 178 langid = _LANGUAGE_LANGID[language] | |
| 179 return langid[0] | |
| 180 | |
| 181 | |
| 182 def GetPrimaryLanguage(language): | |
| 183 """ Returns the primary language ID for the given |language|. """ | |
| 184 langid = _LANGUAGE_LANGID[language] | |
| 185 return langid[2] | |
| 186 | |
| 187 | |
| 188 def GetSublanguage(language): | |
| 189 """ Returns the sublanguage ID for the given |language|. """ | |
| 190 langid = _LANGUAGE_LANGID[language] | |
| 191 return langid[3] | |
| 192 | |
| 193 | |
| 194 def IsRtlLanguage(language): | |
| 195 return language in _RTL_LANGUAGES; | |
| 196 | |
| 197 | |
| 198 def NormalizeLanguageCode(language): | |
| 199 return language.replace('_', '-', 1) | |
| 200 | |
| 201 | |
| 202 def ReadValuesFromFile(values_dict, file_name): | |
| 203 """ | |
| 204 Reads KEYWORD=VALUE settings from the specified file. | |
| 205 | |
| 206 Everything to the left of the first '=' is the keyword, | |
| 207 everything to the right is the value. No stripping of | |
| 208 white space, so beware. | |
| 209 | |
| 210 The file must exist, otherwise you get the Python exception from open(). | |
| 211 """ | |
| 212 for line in open(file_name, 'r').readlines(): | |
| 213 key, val = line.rstrip('\r\n').split('=', 1) | |
| 214 values_dict[key] = val | |
| 215 | |
| 216 | |
| 217 def ReadMessagesFromFile(file_name): | |
| 218 """ | |
| 219 Reads messages from a 'chrome_messages_json' file. | |
| 220 | |
| 221 The file must exist, otherwise you get the Python exception from open(). | |
| 222 """ | |
| 223 messages_file = io.open(file_name, encoding='utf-8-sig') | |
| 224 messages = json.load(messages_file) | |
| 225 messages_file.close() | |
| 226 | |
| 227 values = {} | |
| 228 for key in messages.keys(): | |
| 229 values[key] = unicode(messages[key]['message']); | |
| 230 return values | |
| 231 | |
| 232 | |
| 233 def WriteIfChanged(file_name, contents, encoding='utf-16'): | |
| 234 """ | |
| 235 Writes the specified contents to the specified file_name | |
| 236 iff the contents are different than the current contents. | |
| 237 """ | |
| 238 try: | |
| 239 target = io.open(file_name, 'r') | |
| 240 old_contents = target.read() | |
| 241 except EnvironmentError: | |
| 242 pass | |
| 243 except UnicodeDecodeError: | |
| 244 target.close() | |
| 245 os.unlink(file_name) | |
| 246 else: | |
| 247 if contents == old_contents: | |
| 248 return | |
| 249 target.close() | |
| 250 os.unlink(file_name) | |
| 251 io.open(file_name, 'w', encoding=encoding).write(contents) | |
| 252 | |
| 253 | |
| 254 class MessageMap: | |
| 255 """ Provides a dictionary of localized messages for each language.""" | |
| 256 def __init__(self, languages, messages_path): | |
| 257 self.language = None | |
| 258 self.message_map = {} | |
| 259 | |
| 260 # Populate the message map | |
| 261 if messages_path: | |
| 262 for language in languages: | |
| 263 file_name = os.path.join(messages_path, | |
| 264 language.replace('-', '_', 1), | |
| 265 'messages.json') | |
| 266 self.message_map[language] = ReadMessagesFromFile(file_name) | |
| 267 | |
| 268 def GetText(self, message): | |
| 269 """ Returns a localized message for the current language. """ | |
| 270 return self.message_map[self.language][message] | |
| 271 | |
| 272 def SelectLanguage(self, language): | |
| 273 """ Selects the language to be used when retrieving localized messages. """ | |
| 274 self.language = language | |
| 275 | |
| 276 def MakeSelectLanguage(self): | |
| 277 """ Returns a function that can be used to select the current language. """ | |
| 278 return lambda language: self.SelectLanguage(language) | |
| 279 | |
| 280 def MakeGetText(self): | |
| 281 """ Returns a function that can be used to retrieve a localized message. """ | |
| 282 return lambda message: self.GetText(message) | |
| 283 | |
| 284 | |
| 285 def Localize(source, target, options): | |
| 286 # Load jinja2 library. | |
| 287 if options.jinja2: | |
| 288 jinja2_path = os.path.normpath(options.jinja2) | |
| 289 else: | |
| 290 jinja2_path = os.path.normpath(os.path.join(os.path.abspath(__file__), | |
| 291 '../../../third_party/jinja2')) | |
| 292 sys.path.append(os.path.split(jinja2_path)[0]) | |
| 293 from jinja2 import Environment, FileSystemLoader | |
| 294 | |
| 295 # Create jinja2 environment. | |
| 296 (template_path, template_name) = os.path.split(source) | |
| 297 env = Environment(loader=FileSystemLoader(template_path), | |
| 298 extensions=['jinja2.ext.do', 'jinja2.ext.i18n']) | |
| 299 | |
| 300 # Register custom filters. | |
| 301 env.filters['GetCodepage'] = GetCodepage | |
| 302 env.filters['GetCodepageDecimal'] = GetCodepageDecimal | |
| 303 env.filters['GetLangId'] = GetLangId | |
| 304 env.filters['GetPrimaryLanguage'] = GetPrimaryLanguage | |
| 305 env.filters['GetSublanguage'] = GetSublanguage | |
| 306 | |
| 307 # Set the list of languages to use | |
| 308 languages = map(NormalizeLanguageCode, options.languages) | |
| 309 context = { 'languages' : languages } | |
| 310 env.globals['IsRtlLanguage'] = IsRtlLanguage | |
| 311 | |
| 312 # Load the localized messages and register the message map with jinja2.i18n | |
| 313 # extension. | |
| 314 message_map = MessageMap(languages, options.messages_path) | |
| 315 env.globals['SelectLanguage'] = message_map.MakeSelectLanguage() | |
| 316 env.install_gettext_callables(message_map.MakeGetText(), | |
| 317 message_map.MakeGetText()); | |
| 318 | |
| 319 # Add OFFICIAL_BUILD variable the same way chrome/tools/build/version.py | |
| 320 # does. | |
| 321 if os.environ.get('CHROME_BUILD_TYPE') == '_official': | |
| 322 context['official_build'] = '1' | |
| 323 else: | |
| 324 context['official_build'] = '0' | |
| 325 | |
| 326 # Add all variables defined in the command line. | |
| 327 if options.define: | |
| 328 for define in options.define: | |
| 329 context.update(dict([define.split('=', 1)])); | |
| 330 | |
| 331 # Read KEYWORD=VALUE variables from file. | |
| 332 if options.input: | |
| 333 for file_name in options.input: | |
| 334 ReadValuesFromFile(context, file_name) | |
| 335 | |
| 336 template = env.get_template(template_name) | |
| 337 WriteIfChanged(target, template.render(context), options.encoding); | |
| 338 return 0; | |
| 339 | |
| 340 | |
| 341 def main(): | |
| 342 usage = "Usage: localize [options] <input> <output>" | |
| 343 parser = OptionParser(usage=usage) | |
| 344 parser.add_option( | |
| 345 '-d', '--define', dest='define', action='append', type='string', | |
| 346 help='define a variable (VAR=VALUE).') | |
| 347 parser.add_option( | |
| 348 '-i', '--input', dest='input', action='append', type='string', | |
| 349 help='read variables from INPUT.') | |
| 350 parser.add_option( | |
| 351 '-l', '--language', dest='languages', action='append', type='string', | |
| 352 help='add LANGUAGE to the list of languages to use.') | |
| 353 parser.add_option( | |
| 354 '--encoding', dest='encoding', type='string', default='utf-16', | |
| 355 help="set the encoding of <output>. 'utf-16' is the default.") | |
| 356 parser.add_option( | |
| 357 '--jinja2', dest='jinja2', type='string', | |
| 358 help="specifies path to the jinja2 library.") | |
| 359 parser.add_option( | |
| 360 '--messages_path', dest='messages_path', type='string', | |
| 361 help="set path to localized messages.") | |
| 362 | |
| 363 options, args = parser.parse_args() | |
| 364 if len(args) != 2: | |
| 365 parser.error('Two positional arguments (<input> and <output>) are expected') | |
| 366 if not options.languages: | |
| 367 parser.error('At least one language must be specified') | |
| 368 if not options.messages_path: | |
| 369 parser.error('--messages_path is required') | |
| 370 | |
| 371 return Localize(args[0], args[1], options) | |
| 372 | |
| 373 if __name__ == '__main__': | |
| 374 sys.exit(main()) | |
| 375 | |
| OLD | NEW |