OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """The 'grit android2grd' tool.""" | |
7 | |
8 | |
9 import getopt | |
10 import os.path | |
11 import StringIO | |
12 from xml.dom import Node | |
13 import xml.dom.minidom | |
14 | |
15 import grit.node.empty | |
16 from grit.node import io | |
17 from grit.node import message | |
18 | |
19 from grit.tool import interface | |
20 | |
21 from grit import grd_reader | |
22 from grit import lazy_re | |
23 from grit import tclib | |
24 from grit import util | |
25 | |
26 | |
27 # The name of a string in strings.xml | |
28 _STRING_NAME = lazy_re.compile(r'[a-z0-9_]+\Z') | |
29 | |
30 # A string's character limit in strings.xml | |
31 _CHAR_LIMIT = lazy_re.compile(r'\[CHAR-LIMIT=(\d+)\]') | |
32 | |
33 # Finds String.Format() style format specifiers such as "%-5.2f". | |
34 _FORMAT_SPECIFIER = lazy_re.compile( | |
35 '%' | |
36 '([1-9][0-9]*\$|<)?' # argument_index | |
37 '([-#+ 0,(]*)' # flags | |
38 '([0-9]+)?' # width | |
39 '(\.[0-9]+)?' # precision | |
40 '([bBhHsScCdoxXeEfgGaAtT%n])') # conversion | |
41 | |
42 | |
43 class Android2Grd(interface.Tool): | |
44 """Tool for converting Android string.xml files into chrome Grd files. | |
45 | |
46 Usage: grit [global options] android2grd [OPTIONS] STRINGS_XML | |
47 | |
48 The Android2Grd tool will convert an Android strings.xml file (whose path is | |
49 specified by STRINGS_XML) and create a chrome style grd file containing the | |
50 relevant information. | |
51 | |
52 Because grd documents are much richer than strings.xml documents we supplement | |
53 the information required by grds using OPTIONS with sensible defaults. | |
54 | |
55 OPTIONS may be any of the following: | |
56 | |
57 --name FILENAME Specify the base FILENAME. This should be without | |
58 any file type suffix. By default | |
59 "chrome_android_strings" will be used. | |
60 | |
61 --languages LANGUAGES Comma separated list of ISO language codes (e.g. | |
62 en-US, en-GB, ru, zh-CN). These codes will be used | |
63 to determine the names of resource and translations | |
64 files that will be declared by the output grd file. | |
65 | |
66 --grd-dir GRD_DIR Specify where the resultant grd file | |
67 (FILENAME.grd) should be output. By default this | |
68 will be the present working directory. | |
69 | |
70 --header-dir HEADER_DIR Specify the location of the directory where grit | |
71 generated C++ headers (whose name will be | |
72 FILENAME.h) will be placed. Use an empty string to | |
73 disable rc generation. Default: empty. | |
74 | |
75 --rc-dir RC_DIR Specify the directory where resource files will | |
76 be located relative to grit build's output | |
77 directory. Use an empty string to disable rc | |
78 generation. Default: empty. | |
79 | |
80 --xml-dir XML_DIR Specify where to place localized strings.xml files | |
81 relative to grit build's output directory. For each | |
82 language xx a values-xx/strings.xml file will be | |
83 generated. Use an empty string to disable | |
84 strings.xml generation. Default: '.'. | |
85 | |
86 --xtb-dir XTB_DIR Specify where the xtb files containing translations | |
87 will be located relative to the grd file. Default: | |
88 '.'. | |
89 """ | |
90 | |
91 _NAME_FLAG = 'name' | |
92 _LANGUAGES_FLAG = 'languages' | |
93 _GRD_DIR_FLAG = 'grd-dir' | |
94 _RC_DIR_FLAG = 'rc-dir' | |
95 _HEADER_DIR_FLAG = 'header-dir' | |
96 _XTB_DIR_FLAG = 'xtb-dir' | |
97 _XML_DIR_FLAG = 'xml-dir' | |
98 | |
99 def __init__(self): | |
100 self.name = 'chrome_android_strings' | |
101 self.languages = [] | |
102 self.grd_dir = '.' | |
103 self.rc_dir = None | |
104 self.xtb_dir = '.' | |
105 self.xml_res_dir = '.' | |
106 self.header_dir = None | |
107 | |
108 def ShortDescription(self): | |
109 """Returns a short description of the Android2Grd tool. | |
110 | |
111 Overridden from grit.interface.Tool | |
112 | |
113 Returns: | |
114 A string containing a short description of the android2grd tool. | |
115 """ | |
116 return 'Converts Android string.xml files into Chrome grd files.' | |
117 | |
118 def ParseOptions(self, args): | |
119 """Set this objects and return all non-option arguments.""" | |
120 flags = [ | |
121 Android2Grd._NAME_FLAG, | |
122 Android2Grd._LANGUAGES_FLAG, | |
123 Android2Grd._GRD_DIR_FLAG, | |
124 Android2Grd._RC_DIR_FLAG, | |
125 Android2Grd._HEADER_DIR_FLAG, | |
126 Android2Grd._XTB_DIR_FLAG, | |
127 Android2Grd._XML_DIR_FLAG, ] | |
128 (opts, args) = getopt.getopt(args, None, ['%s=' % o for o in flags]) | |
129 | |
130 for key, val in opts: | |
131 # Get rid of the preceding hypens. | |
132 k = key[2:] | |
133 if k == Android2Grd._NAME_FLAG: | |
134 self.name = val | |
135 elif k == Android2Grd._LANGUAGES_FLAG: | |
136 self.languages = val.split(',') | |
137 elif k == Android2Grd._GRD_DIR_FLAG: | |
138 self.grd_dir = val | |
139 elif k == Android2Grd._RC_DIR_FLAG: | |
140 self.rc_dir = val | |
141 elif k == Android2Grd._HEADER_DIR_FLAG: | |
142 self.header_dir = val | |
143 elif k == Android2Grd._XTB_DIR_FLAG: | |
144 self.xtb_dir = val | |
145 elif k == Android2Grd._XML_DIR_FLAG: | |
146 self.xml_res_dir = val | |
147 return args | |
148 | |
149 def Run(self, opts, args): | |
150 """Runs the Android2Grd tool. | |
151 | |
152 Inherited from grit.interface.Tool. | |
153 | |
154 Args: | |
155 opts: List of string arguments that should be parsed. | |
156 args: String containing the path of the strings.xml file to be converted. | |
157 """ | |
158 args = self.ParseOptions(args) | |
159 if len(args) != 1: | |
160 print ('Tool requires one argument, the path to the Android ' | |
161 'strings.xml resource file to be converted.') | |
162 return 2 | |
163 self.SetOptions(opts) | |
164 | |
165 android_path = args[0] | |
166 | |
167 # Read and parse the Android strings.xml file. | |
168 with open(android_path) as android_file: | |
169 android_dom = xml.dom.minidom.parse(android_file) | |
170 | |
171 # Do the hard work -- convert the Android dom to grd file contents. | |
172 grd_dom = self.AndroidDomToGrdDom(android_dom) | |
173 grd_string = unicode(grd_dom) | |
174 | |
175 # Write the grd string to a file in grd_dir. | |
176 grd_filename = self.name + '.grd' | |
177 grd_path = os.path.join(self.grd_dir, grd_filename) | |
178 with open(grd_path, 'w') as grd_file: | |
179 grd_file.write(grd_string) | |
180 | |
181 def AndroidDomToGrdDom(self, android_dom): | |
182 """Converts a strings.xml DOM into a DOM representing the contents of | |
183 a grd file. | |
184 | |
185 Args: | |
186 android_dom: A xml.dom.Document containing the contents of the Android | |
187 string.xml document. | |
188 Returns: | |
189 The DOM for the grd xml document produced by converting the Android DOM. | |
190 """ | |
191 | |
192 # Start with a basic skeleton for the .grd file. | |
193 root = grd_reader.Parse(StringIO.StringIO( | |
194 '''<?xml version="1.0" encoding="UTF-8"?> | |
195 <grit base_dir="." latest_public_release="0" | |
196 current_release="1" source_lang_id="en"> | |
197 <outputs /> | |
198 <translations /> | |
199 <release allow_pseudo="false" seq="1"> | |
200 <messages fallback_to_english="true" /> | |
201 </release> | |
202 </grit>'''), dir='.') | |
203 outputs = root.children[0] | |
204 translations = root.children[1] | |
205 messages = root.children[2].children[0] | |
206 assert (isinstance(messages, grit.node.empty.MessagesNode) and | |
207 isinstance(translations, grit.node.empty.TranslationsNode) and | |
208 isinstance(outputs, grit.node.empty.OutputsNode)) | |
209 | |
210 if self.header_dir: | |
211 cpp_header = self.__CreateCppHeaderOutputNode(outputs, self.header_dir) | |
212 for lang in self.languages: | |
213 # Create an output element for each language. | |
214 if self.rc_dir: | |
215 self.__CreateRcOutputNode(outputs, lang, self.rc_dir) | |
216 if self.xml_res_dir: | |
217 self.__CreateAndroidXmlOutputNode(outputs, lang, self.xml_res_dir) | |
218 if lang != 'en': | |
219 self.__CreateFileNode(translations, lang) | |
220 # Convert all the strings.xml strings into grd messages. | |
221 self.__CreateMessageNodes(messages, android_dom.documentElement) | |
222 | |
223 return root | |
224 | |
225 def __CreateMessageNodes(self, messages, resources): | |
226 """Creates the <message> elements and adds them as children of <messages>. | |
227 | |
228 Args: | |
229 messages: the <messages> element in the strings.xml dom. | |
230 resources: the <resources> element in the grd dom. | |
231 """ | |
232 # <string> elements contain the definition of the resource. | |
233 # The description of a <string> element is contained within the comment | |
234 # node element immediately preceeding the string element in question. | |
235 description = '' | |
236 for child in resources.childNodes: | |
237 if child.nodeType == Node.COMMENT_NODE: | |
238 # Remove leading/trailing whitespace; collapse consecutive whitespaces. | |
239 description = ' '.join(child.data.split()) | |
240 elif child.nodeType == Node.ELEMENT_NODE: | |
241 if child.tagName != 'string': | |
242 print 'Warning: ignoring unknown tag <%s>' % child.tagName | |
243 else: | |
244 translatable = self.IsTranslatable(child) | |
245 raw_name = child.getAttribute('name') | |
246 if not _STRING_NAME.match(raw_name): | |
247 print 'Error: illegal string name: %s' % raw_name | |
248 grd_name = 'IDS_' + raw_name.upper() | |
249 # Transform the <string> node contents into a tclib.Message, taking | |
250 # care to handle whitespace transformations and escaped characters, | |
251 # and coverting <xliff:g> placeholders into <ph> placeholders. | |
252 msg = self.CreateTclibMessage(child) | |
253 msg_node = self.__CreateMessageNode(messages, grd_name, description, | |
254 msg, translatable) | |
255 messages.AddChild(msg_node) | |
256 # Reset the description once a message has been parsed. | |
257 description = '' | |
258 | |
259 def CreateTclibMessage(self, android_string): | |
260 """Transforms a <string/> element from strings.xml into a tclib.Message. | |
261 | |
262 Interprets whitespace, quotes, and escaped characters in the android_string | |
263 according to Android's formatting and styling rules for strings. Also | |
264 converts <xliff:g> placeholders into <ph> placeholders, e.g.: | |
265 | |
266 <xliff:g id="website" example="google.com">%s</xliff:g> | |
267 becomes | |
268 <ph name="website"><ex>google.com</ex>%s</ph> | |
269 | |
270 Returns: | |
271 The tclib.Message. | |
272 """ | |
273 msg = tclib.Message() | |
274 current_text = '' # Accumulated text that hasn't yet been added to msg. | |
275 nodes = android_string.childNodes | |
276 | |
277 for i, node in enumerate(nodes): | |
278 # Handle text nodes. | |
279 if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): | |
280 current_text += node.data | |
281 | |
282 # Handle <xliff:g> and other tags. | |
283 elif node.nodeType == Node.ELEMENT_NODE: | |
284 if node.tagName == 'xliff:g': | |
285 assert node.hasAttribute('id'), 'missing id: ' + node.data() | |
286 placeholder_id = node.getAttribute('id') | |
287 placeholder_text = self.__FormatPlaceholderText(node) | |
288 placeholder_example = node.getAttribute('example') | |
289 if not placeholder_example: | |
290 print ('Info: placeholder does not contain an example: %s' % | |
291 node.toxml()) | |
292 placeholder_example = placeholder_id.upper() | |
293 msg.AppendPlaceholder(tclib.Placeholder(placeholder_id, | |
294 placeholder_text, placeholder_example)) | |
295 else: | |
296 print ('Warning: removing tag <%s> which must be inside a ' | |
297 'placeholder: %s' % (node.tagName, node.toxml())) | |
298 msg.AppendText(self.__FormatPlaceholderText(node)) | |
299 | |
300 # Handle other nodes. | |
301 elif node.nodeType != Node.COMMENT_NODE: | |
302 assert False, 'Unknown node type: %s' % node.nodeType | |
303 | |
304 is_last_node = (i == len(nodes) - 1) | |
305 if (current_text and | |
306 (is_last_node or nodes[i + 1].nodeType == Node.ELEMENT_NODE)): | |
307 # For messages containing just text and comments (no xml tags) Android | |
308 # strips leading and trailing whitespace. We mimic that behavior. | |
309 if not msg.GetContent() and is_last_node: | |
310 current_text = current_text.strip() | |
311 msg.AppendText(self.__FormatAndroidString(current_text)) | |
312 current_text = '' | |
313 | |
314 return msg | |
315 | |
316 def __FormatAndroidString(self, android_string, inside_placeholder=False): | |
317 r"""Returns android_string formatted for a .grd file. | |
318 | |
319 * Collapses consecutive whitespaces, except when inside double-quotes. | |
320 * Replaces \\, \n, \t, \", \' with \, newline, tab, ", '. | |
321 """ | |
322 backslash_map = {'\\' : '\\', 'n' : '\n', 't' : '\t', '"' : '"', "'" : "'"} | |
323 is_quoted_section = False # True when we're inside double quotes. | |
324 is_backslash_sequence = False # True after seeing an unescaped backslash. | |
325 prev_char = '' | |
326 output = [] | |
327 for c in android_string: | |
328 if is_backslash_sequence: | |
329 # Unescape \\, \n, \t, \", and \'. | |
330 assert c in backslash_map, 'Illegal escape sequence: \\%s' % c | |
331 output.append(backslash_map[c]) | |
332 is_backslash_sequence = False | |
333 elif c == '\\': | |
334 is_backslash_sequence = True | |
335 elif c.isspace() and not is_quoted_section: | |
336 # Turn whitespace into ' ' and collapse consecutive whitespaces. | |
337 if not prev_char.isspace(): | |
338 output.append(' ') | |
339 elif c == '"': | |
340 is_quoted_section = not is_quoted_section | |
341 else: | |
342 output.append(c) | |
343 prev_char = c | |
344 output = ''.join(output) | |
345 | |
346 if is_quoted_section: | |
347 print 'Warning: unbalanced quotes in string: %s' % android_string | |
348 | |
349 if is_backslash_sequence: | |
350 print 'Warning: trailing backslash in string: %s' % android_string | |
351 | |
352 # Check for format specifiers outside of placeholder tags. | |
353 if not inside_placeholder: | |
354 format_specifier = _FORMAT_SPECIFIER.search(output) | |
355 if format_specifier: | |
356 print ('Warning: format specifiers are not inside a placeholder ' | |
357 '<xliff:g/> tag: %s' % output) | |
358 | |
359 return output | |
360 | |
361 def __FormatPlaceholderText(self, placeholder_node): | |
362 """Returns the text inside of an <xliff:g> placeholder node.""" | |
363 text = [] | |
364 for childNode in placeholder_node.childNodes: | |
365 if childNode.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): | |
366 text.append(childNode.data) | |
367 elif childNode.nodeType != Node.COMMENT_NODE: | |
368 assert False, 'Unknown node type in ' + placeholder_node.toxml() | |
369 return self.__FormatAndroidString(''.join(text), inside_placeholder=True) | |
370 | |
371 def __CreateMessageNode(self, messages_node, grd_name, description, msg, | |
372 translatable): | |
373 """Creates and initializes a <message> element. | |
374 | |
375 Message elements correspond to Android <string> elements in that they | |
376 declare a string resource along with a programmatic id. | |
377 """ | |
378 if not description: | |
379 print 'Warning: no description for %s' % grd_name | |
380 # Check that we actually fit within the character limit we've specified. | |
381 match = _CHAR_LIMIT.search(description) | |
382 if match: | |
383 char_limit = int(match.group(1)) | |
384 msg_content = msg.GetRealContent() | |
385 if len(msg_content) > char_limit: | |
386 print ('Warning: char-limit for %s is %d, but length is %d: %s' % | |
387 (grd_name, char_limit, len(msg_content), msg_content)) | |
388 return message.MessageNode.Construct(parent=messages_node, | |
389 name=grd_name, | |
390 message=msg, | |
391 desc=description, | |
392 translateable=translatable) | |
393 | |
394 def __CreateFileNode(self, translations_node, lang): | |
395 """Creates and initializes the <file> elements. | |
396 | |
397 File elements provide information on the location of translation files | |
398 (xtbs) | |
399 """ | |
400 xtb_file = os.path.normpath(os.path.join( | |
401 self.xtb_dir, '%s_%s.xtb' % (self.name, lang))) | |
402 fnode = io.FileNode() | |
403 fnode.StartParsing(u'file', translations_node) | |
404 fnode.HandleAttribute('path', xtb_file) | |
405 fnode.HandleAttribute('lang', lang) | |
406 fnode.EndParsing() | |
407 translations_node.AddChild(fnode) | |
408 return fnode | |
409 | |
410 def __CreateCppHeaderOutputNode(self, outputs_node, header_dir): | |
411 """Creates the <output> element corresponding to the generated c header.""" | |
412 header_file_name = os.path.join(header_dir, self.name + '.h') | |
413 header_node = io.OutputNode() | |
414 header_node.StartParsing(u'output', outputs_node) | |
415 header_node.HandleAttribute('filename', header_file_name) | |
416 header_node.HandleAttribute('type', 'rc_header') | |
417 emit_node = io.EmitNode() | |
418 emit_node.StartParsing(u'emit', header_node) | |
419 emit_node.HandleAttribute('emit_type', 'prepend') | |
420 emit_node.EndParsing() | |
421 header_node.AddChild(emit_node) | |
422 header_node.EndParsing() | |
423 outputs_node.AddChild(header_node) | |
424 return header_node | |
425 | |
426 def __CreateRcOutputNode(self, outputs_node, lang, rc_dir): | |
427 """Creates the <output> element corresponding to various rc file output.""" | |
428 rc_file_name = self.name + '_' + lang + ".rc" | |
429 rc_path = os.path.join(rc_dir, rc_file_name) | |
430 node = io.OutputNode() | |
431 node.StartParsing(u'output', outputs_node) | |
432 node.HandleAttribute('filename', rc_path) | |
433 node.HandleAttribute('lang', lang) | |
434 node.HandleAttribute('type', 'rc_all') | |
435 node.EndParsing() | |
436 outputs_node.AddChild(node) | |
437 return node | |
438 | |
439 def __CreateAndroidXmlOutputNode(self, outputs_node, locale, xml_res_dir): | |
440 """Creates the <output> element corresponding to various rc file output.""" | |
441 # Need to check to see if the locale has a region, e.g. the GB in en-GB. | |
442 # When a locale has a region Android expects the region to be prefixed | |
443 # with an 'r'. For example for en-GB Android expects a values-en-rGB | |
444 # directory. Also, Android expects nb, tl, in, iw, ji as the language | |
445 # codes for Norwegian, Tagalog/Filipino, Indonesian, Hebrew, and Yiddish: | |
446 # http://developer.android.com/reference/java/util/Locale.html | |
447 if locale == 'es-419': | |
448 android_locale = 'es-rUS' | |
449 else: | |
450 android_lang, dash, region = locale.partition('-') | |
451 lang_map = {'no': 'nb', 'fil': 'tl', 'id': 'in', 'he': 'iw', 'yi': 'ji'} | |
452 android_lang = lang_map.get(android_lang, android_lang) | |
453 android_locale = android_lang + ('-r' + region if region else '') | |
454 values = 'values-' + android_locale if android_locale != 'en' else 'values' | |
455 xml_path = os.path.normpath(os.path.join( | |
456 xml_res_dir, values, 'strings.xml')) | |
457 | |
458 node = io.OutputNode() | |
459 node.StartParsing(u'output', outputs_node) | |
460 node.HandleAttribute('filename', xml_path) | |
461 node.HandleAttribute('lang', locale) | |
462 node.HandleAttribute('type', 'android') | |
463 node.EndParsing() | |
464 outputs_node.AddChild(node) | |
465 return node | |
466 | |
467 def IsTranslatable(self, android_string): | |
468 """Determines if a <string> element is a candidate for translation. | |
469 | |
470 A <string> element is by default translatable unless otherwise marked. | |
471 """ | |
472 if android_string.hasAttribute('translatable'): | |
473 value = android_string.getAttribute('translatable').lower() | |
474 if value not in ('true', 'false'): | |
475 print 'Warning: translatable attribute has invalid value: %s' % value | |
476 return value == 'true' | |
477 else: | |
478 return True | |
479 | |
OLD | NEW |