OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 '''The 'grit rc2grd' tool.''' | |
7 | |
8 | |
9 import os.path | |
10 import getopt | |
11 import re | |
12 import StringIO | |
13 import types | |
14 | |
15 import grit.node.empty | |
16 from grit.node import include | |
17 from grit.node import structure | |
18 from grit.node import message | |
19 | |
20 from grit.gather import rc | |
21 from grit.gather import tr_html | |
22 | |
23 from grit.tool import interface | |
24 from grit.tool import postprocess_interface | |
25 from grit.tool import preprocess_interface | |
26 | |
27 from grit import grd_reader | |
28 from grit import lazy_re | |
29 from grit import tclib | |
30 from grit import util | |
31 | |
32 | |
33 # Matches files referenced from an .rc file | |
34 _FILE_REF = lazy_re.compile(''' | |
35 ^(?P<id>[A-Z_0-9.]+)[ \t]+ | |
36 (?P<type>[A-Z_0-9]+)[ \t]+ | |
37 "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE) | |
38 | |
39 | |
40 # Matches a dialog section | |
41 _DIALOG = lazy_re.compile( | |
42 '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$', | |
43 re.MULTILINE | re.DOTALL) | |
44 | |
45 | |
46 # Matches a menu section | |
47 _MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$', | |
48 re.MULTILINE | re.DOTALL) | |
49 | |
50 | |
51 # Matches a versioninfo section | |
52 _VERSIONINFO = lazy_re.compile( | |
53 '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$', | |
54 re.MULTILINE | re.DOTALL) | |
55 | |
56 | |
57 # Matches a stringtable | |
58 _STRING_TABLE = lazy_re.compile( | |
59 ('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|' | |
60 'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'), | |
61 re.MULTILINE | re.DOTALL) | |
62 | |
63 | |
64 # Matches each message inside a stringtable, breaking it up into comments, | |
65 # the ID of the message, and the (RC-escaped) message text. | |
66 _MESSAGE = lazy_re.compile(''' | |
67 (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message | |
68 ^\s* | |
69 (?P<id>[A-Za-z0-9_]+) # id | |
70 \s+ | |
71 "(?P<text>.*?([^"]|""))"([^"]|$) # The message itself | |
72 ''', re.MULTILINE | re.DOTALL | re.VERBOSE) | |
73 | |
74 | |
75 # Matches each line of comment text in a multi-line comment. | |
76 _COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE) | |
77 | |
78 | |
79 # Matches a string that is empty or all whitespace | |
80 _WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE) | |
81 | |
82 | |
83 # Finds printf and FormatMessage style format specifiers | |
84 # Uses non-capturing groups except for the outermost group, so the output of | |
85 # re.split() should include both the normal text and what we intend to | |
86 # replace with placeholders. | |
87 # TODO(joi) Check documentation for printf (and Windows variants) and FormatMess
age | |
88 _FORMAT_SPECIFIER = lazy_re.compile( | |
89 '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last cha
r | |
90 '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)' # printf last char | |
91 '|\$[1-9][0-9]*)') # FormatMessage | |
92 | |
93 | |
94 class Rc2Grd(interface.Tool): | |
95 '''A tool for converting .rc files to .grd files. This tool is only for | |
96 converting the source (nontranslated) .rc file to a .grd file. For importing | |
97 existing translations, use the rc2xtb tool. | |
98 | |
99 Usage: grit [global options] rc2grd [OPTIONS] RCFILE | |
100 | |
101 The tool takes a single argument, which is the path to the .rc file to convert. | |
102 It outputs a .grd file with the same name in the same directory as the .rc file. | |
103 The .grd file may have one or more TODO comments for things that have to be | |
104 cleaned up manually. | |
105 | |
106 OPTIONS may be any of the following: | |
107 | |
108 -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'. | |
109 | |
110 -h TYPE Specify the TYPE attribute for HTML structures. | |
111 Default is 'tr_html'. | |
112 | |
113 -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'. | |
114 | |
115 -n MATCH Specify the regular expression to match in comments that will | |
116 indicate that the resource the comment belongs to is not | |
117 translateable. Default is 'Not locali(s|z)able'. | |
118 | |
119 -r GRDFILE Specify that GRDFILE should be used as a "role model" for | |
120 any placeholders that otherwise would have had TODO names. | |
121 This attempts to find an identical message in the GRDFILE | |
122 and uses that instead of the automatically placeholderized | |
123 message. | |
124 | |
125 --pre CLASS Specify an optional, fully qualified classname, which | |
126 has to be a subclass of grit.tool.PreProcessor, to | |
127 run on the text of the RC file before conversion occurs. | |
128 This can be used to support constructs in the RC files | |
129 that GRIT cannot handle on its own. | |
130 | |
131 --post CLASS Specify an optional, fully qualified classname, which | |
132 has to be a subclass of grit.tool.PostProcessor, to | |
133 run on the text of the converted RC file. | |
134 This can be used to alter the content of the RC file | |
135 based on the conversion that occured. | |
136 | |
137 For menus, dialogs and version info, the .grd file will refer to the original | |
138 .rc file. Once conversion is complete, you can strip the original .rc file | |
139 of its string table and all comments as these will be available in the .grd | |
140 file. | |
141 | |
142 Note that this tool WILL NOT obey C preprocessor rules, so even if something | |
143 is #if 0-ed out it will still be included in the output of this tool | |
144 Therefore, if your .rc file contains sections like this, you should run the | |
145 C preprocessor on the .rc file or manually edit it before using this tool. | |
146 ''' | |
147 | |
148 def ShortDescription(self): | |
149 return 'A tool for converting .rc source files to .grd files.' | |
150 | |
151 def __init__(self): | |
152 self.input_encoding = 'cp1252' | |
153 self.html_type = 'tr_html' | |
154 self.html_encoding = 'utf-8' | |
155 self.not_localizable_re = re.compile('Not locali(s|z)able') | |
156 self.role_model = None | |
157 self.pre_process = None | |
158 self.post_process = None | |
159 | |
160 def ParseOptions(self, args): | |
161 '''Given a list of arguments, set this object's options and return | |
162 all non-option arguments. | |
163 ''' | |
164 (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post=']) | |
165 for (key, val) in own_opts: | |
166 if key == '-e': | |
167 self.input_encoding = val | |
168 elif key == '-h': | |
169 self.html_type = val | |
170 elif key == '-u': | |
171 self.html_encoding = val | |
172 elif key == '-n': | |
173 self.not_localizable_re = re.compile(val) | |
174 elif key == '-r': | |
175 self.role_model = grd_reader.Parse(val) | |
176 elif key == '--pre': | |
177 self.pre_process = val | |
178 elif key == '--post': | |
179 self.post_process = val | |
180 return args | |
181 | |
182 def Run(self, opts, args): | |
183 args = self.ParseOptions(args) | |
184 if len(args) != 1: | |
185 print ('This tool takes a single tool-specific argument, the path to the\n
' | |
186 '.rc file to process.') | |
187 return 2 | |
188 self.SetOptions(opts) | |
189 | |
190 path = args[0] | |
191 out_path = os.path.join(util.dirname(path), | |
192 os.path.splitext(os.path.basename(path))[0] + '.grd') | |
193 | |
194 rctext = util.ReadFile(path, self.input_encoding) | |
195 grd_text = unicode(self.Process(rctext, path)) | |
196 with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile: | |
197 outfile.write(grd_text) | |
198 | |
199 print 'Wrote output file %s.\nPlease check for TODO items in the file.' % ou
t_path | |
200 | |
201 | |
202 def Process(self, rctext, rc_path): | |
203 '''Processes 'rctext' and returns a resource tree corresponding to it. | |
204 | |
205 Args: | |
206 rctext: complete text of the rc file | |
207 rc_path: 'resource\resource.rc' | |
208 | |
209 Return: | |
210 grit.node.base.Node subclass | |
211 ''' | |
212 | |
213 if self.pre_process: | |
214 preprocess_class = util.NewClassInstance(self.pre_process, | |
215 preprocess_interface.PreProcessor
) | |
216 if preprocess_class: | |
217 rctext = preprocess_class.Process(rctext, rc_path) | |
218 else: | |
219 self.Out( | |
220 'PreProcessing class could not be found. Skipping preprocessing.\n') | |
221 | |
222 # Start with a basic skeleton for the .grd file | |
223 root = grd_reader.Parse(StringIO.StringIO( | |
224 '''<?xml version="1.0" encoding="UTF-8"?> | |
225 <grit base_dir="." latest_public_release="0" | |
226 current_release="1" source_lang_id="en"> | |
227 <outputs /> | |
228 <translations /> | |
229 <release seq="1"> | |
230 <includes /> | |
231 <structures /> | |
232 <messages /> | |
233 </release> | |
234 </grit>'''), util.dirname(rc_path)) | |
235 includes = root.children[2].children[0] | |
236 structures = root.children[2].children[1] | |
237 messages = root.children[2].children[2] | |
238 assert (isinstance(includes, grit.node.empty.IncludesNode) and | |
239 isinstance(structures, grit.node.empty.StructuresNode) and | |
240 isinstance(messages, grit.node.empty.MessagesNode)) | |
241 | |
242 self.AddIncludes(rctext, includes) | |
243 self.AddStructures(rctext, structures, os.path.basename(rc_path)) | |
244 self.AddMessages(rctext, messages) | |
245 | |
246 self.VerboseOut('Validating that all IDs are unique...\n') | |
247 root.ValidateUniqueIds() | |
248 self.ExtraVerboseOut('Done validating that all IDs are unique.\n') | |
249 | |
250 if self.post_process: | |
251 postprocess_class = util.NewClassInstance(self.post_process, | |
252 postprocess_interface.PostProces
sor) | |
253 if postprocess_class: | |
254 root = postprocess_class.Process(rctext, rc_path, root) | |
255 else: | |
256 self.Out( | |
257 'PostProcessing class could not be found. Skipping postprocessing.\n') | |
258 | |
259 return root | |
260 | |
261 | |
262 def IsHtml(self, res_type, fname): | |
263 '''Check whether both the type and file extension indicate HTML''' | |
264 fext = fname.split('.')[-1].lower() | |
265 return res_type == 'HTML' and fext in ('htm', 'html') | |
266 | |
267 | |
268 def AddIncludes(self, rctext, node): | |
269 '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and | |
270 adds each included resource as an <include> child node of 'node'.''' | |
271 for m in _FILE_REF.finditer(rctext): | |
272 id = m.group('id') | |
273 res_type = m.group('type').upper() | |
274 fname = rc.Section.UnEscape(m.group('file')) | |
275 assert fname.find('\n') == -1 | |
276 if not self.IsHtml(res_type, fname): | |
277 self.VerboseOut('Processing %s with ID %s (filename: %s)\n' % | |
278 (res_type, id, fname)) | |
279 node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname)) | |
280 | |
281 | |
282 def AddStructures(self, rctext, node, rc_filename): | |
283 '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version | |
284 information resources and HTML templates) and adds each as a <structure> | |
285 child of 'node'.''' | |
286 # First add HTML includes | |
287 for m in _FILE_REF.finditer(rctext): | |
288 id = m.group('id') | |
289 res_type = m.group('type').upper() | |
290 fname = rc.Section.UnEscape(m.group('file')) | |
291 if self.IsHtml(type, fname): | |
292 node.AddChild(structure.StructureNode.Construct( | |
293 node, id, self.html_type, fname, self.html_encoding)) | |
294 | |
295 # Then add all RC includes | |
296 def AddStructure(res_type, id): | |
297 self.VerboseOut('Processing %s with ID %s\n' % (res_type, id)) | |
298 node.AddChild(structure.StructureNode.Construct(node, id, res_type, | |
299 rc_filename, | |
300 encoding=self.input_encodi
ng)) | |
301 for m in _MENU.finditer(rctext): | |
302 AddStructure('menu', m.group('id')) | |
303 for m in _DIALOG.finditer(rctext): | |
304 AddStructure('dialog', m.group('id')) | |
305 for m in _VERSIONINFO.finditer(rctext): | |
306 AddStructure('version', m.group('id')) | |
307 | |
308 | |
309 def AddMessages(self, rctext, node): | |
310 '''Scans 'rctext' for all messages in string tables, preprocesses them as | |
311 much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d | |
312 type format specifiers get those specifiers replaced with placeholders, and | |
313 HTML-formatted messages get run through the HTML-placeholderizer). Adds | |
314 each message as a <message> node child of 'node'.''' | |
315 for tm in _STRING_TABLE.finditer(rctext): | |
316 table = tm.group('body') | |
317 for mm in _MESSAGE.finditer(table): | |
318 comment_block = mm.group('comment') | |
319 comment_text = [] | |
320 for cm in _COMMENT_TEXT.finditer(comment_block): | |
321 comment_text.append(cm.group('text')) | |
322 comment_text = ' '.join(comment_text) | |
323 | |
324 id = mm.group('id') | |
325 text = rc.Section.UnEscape(mm.group('text')) | |
326 | |
327 self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text)) | |
328 | |
329 msg_obj = self.Placeholderize(text) | |
330 | |
331 # Messages that contain only placeholders do not need translation. | |
332 is_translateable = False | |
333 for item in msg_obj.GetContent(): | |
334 if isinstance(item, types.StringTypes): | |
335 if not _WHITESPACE_ONLY.match(item): | |
336 is_translateable = True | |
337 | |
338 if self.not_localizable_re.search(comment_text): | |
339 is_translateable = False | |
340 | |
341 message_meaning = '' | |
342 internal_comment = '' | |
343 | |
344 # If we have a "role model" (existing GRD file) and this node exists | |
345 # in the role model, use the description, meaning and translateable | |
346 # attributes from the role model. | |
347 if self.role_model: | |
348 role_node = self.role_model.GetNodeById(id) | |
349 if role_node: | |
350 is_translateable = role_node.IsTranslateable() | |
351 message_meaning = role_node.attrs['meaning'] | |
352 comment_text = role_node.attrs['desc'] | |
353 internal_comment = role_node.attrs['internal_comment'] | |
354 | |
355 # For nontranslateable messages, we don't want the complexity of | |
356 # placeholderizing everything. | |
357 if not is_translateable: | |
358 msg_obj = tclib.Message(text=text) | |
359 | |
360 msg_node = message.MessageNode.Construct(node, msg_obj, id, | |
361 desc=comment_text, | |
362 translateable=is_translateable, | |
363 meaning=message_meaning) | |
364 msg_node.attrs['internal_comment'] = internal_comment | |
365 | |
366 node.AddChild(msg_node) | |
367 self.ExtraVerboseOut('Done processing message %s\n' % id) | |
368 | |
369 | |
370 def Placeholderize(self, text): | |
371 '''Creates a tclib.Message object from 'text', attempting to recognize | |
372 a few different formats of text that can be automatically placeholderized | |
373 (HTML code, printf-style format strings, and FormatMessage-style format | |
374 strings). | |
375 ''' | |
376 | |
377 try: | |
378 # First try HTML placeholderizing. | |
379 # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing | |
380 msg = tr_html.HtmlToMessage(text, True) | |
381 for item in msg.GetContent(): | |
382 if not isinstance(item, types.StringTypes): | |
383 return msg # Contained at least one placeholder, so we're done | |
384 | |
385 # HTML placeholderization didn't do anything, so try to find printf or | |
386 # FormatMessage format specifiers and change them into placeholders. | |
387 msg = tclib.Message() | |
388 parts = _FORMAT_SPECIFIER.split(text) | |
389 todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc. | |
390 for part in parts: | |
391 if _FORMAT_SPECIFIER.match(part): | |
392 msg.AppendPlaceholder(tclib.Placeholder( | |
393 'TODO_%04d' % todo_counter, part, 'TODO')) | |
394 todo_counter += 1 | |
395 elif part != '': | |
396 msg.AppendText(part) | |
397 | |
398 if self.role_model and len(parts) > 1: # there are TODO placeholders | |
399 role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText( | |
400 msg.GetRealContent(), '') | |
401 if role_model_msg: | |
402 # replace wholesale to get placeholder names and examples | |
403 msg = role_model_msg | |
404 | |
405 return msg | |
406 except: | |
407 print 'Exception processing message with text "%s"' % text | |
408 raise | |
409 | |
OLD | NEW |