Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(176)

Side by Side Diff: grit/tool/rc2grd.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 9 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « grit/tool/preprocess_unittest.py ('k') | grit/tool/rc2grd_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 #!/usr/bin/python2.4
2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 '''The 'grit rc2grd' tool.'''
7
8
9 import os.path
10 import getopt
11 import re
12 import StringIO
13 import types
14
15 import grit.node.empty
16 from grit.node import include
17 from grit.node import structure
18 from grit.node import message
19
20 from grit.gather import rc
21 from grit.gather import tr_html
22
23 from grit.tool import interface
24 from grit.tool import postprocess_interface
25 from grit.tool import preprocess_interface
26
27 from grit import grd_reader
28 from grit import tclib
29 from grit import util
30
31
32 # Matches files referenced from an .rc file
33 _FILE_REF = re.compile('''
34 ^(?P<id>[A-Z_0-9.]+)[ \t]+
35 (?P<type>[A-Z_0-9]+)[ \t]+
36 "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE)
37
38
39 # Matches a dialog section
40 _DIALOG = re.compile('^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s* $',
41 re.MULTILINE | re.DOTALL)
42
43
44 # Matches a menu section
45 _MENU = re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$',
46 re.MULTILINE | re.DOTALL)
47
48
49 # Matches a versioninfo section
50 _VERSIONINFO = re.compile('^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^E ND\s*$',
51 re.MULTILINE | re.DOTALL)
52
53
54 # Matches a stringtable
55 _STRING_TABLE = re.compile('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS .+|LANGUAGE.+|VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$',
56 re.MULTILINE | re.DOTALL)
57
58
59 # Matches each message inside a stringtable, breaking it up into comments,
60 # the ID of the message, and the (RC-escaped) message text.
61 _MESSAGE = re.compile('''
62 (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message
63 ^\s*
64 (?P<id>[A-Za-z0-9_]+) # id
65 \s+
66 "(?P<text>.*?([^"]|""))"([^"]|$) # The message itself
67 ''', re.MULTILINE | re.DOTALL | re.VERBOSE)
68
69
70 # Matches each line of comment text in a multi-line comment.
71 _COMMENT_TEXT = re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE)
72
73
74 # Matches a string that is empty or all whitespace
75 _WHITESPACE_ONLY = re.compile('\A\s*\Z', re.MULTILINE)
76
77
78 # Finds printf and FormatMessage style format specifiers
79 # Uses non-capturing groups except for the outermost group, so the output of
80 # re.split() should include both the normal text and what we intend to
81 # replace with placeholders.
82 # TODO(joi) Check documentation for printf (and Windows variants) and FormatMess age
83 _FORMAT_SPECIFIER = re.compile(
84 '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last cha r
85 '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)' # printf last char
86 '|\$[1-9][0-9]*)') # FormatMessage
87
88
89 class Rc2Grd(interface.Tool):
90 '''A tool for converting .rc files to .grd files. This tool is only for
91 converting the source (nontranslated) .rc file to a .grd file. For importing
92 existing translations, use the rc2xtb tool.
93
94 Usage: grit [global options] rc2grd [OPTIONS] RCFILE
95
96 The tool takes a single argument, which is the path to the .rc file to convert.
97 It outputs a .grd file with the same name in the same directory as the .rc file.
98 The .grd file may have one or more TODO comments for things that have to be
99 cleaned up manually.
100
101 OPTIONS may be any of the following:
102
103 -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'.
104
105 -h TYPE Specify the TYPE attribute for HTML structures.
106 Default is 'tr_html'.
107
108 -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'.
109
110 -n MATCH Specify the regular expression to match in comments that will
111 indicate that the resource the comment belongs to is not
112 translateable. Default is 'Not locali(s|z)able'.
113
114 -r GRDFILE Specify that GRDFILE should be used as a "role model" for
115 any placeholders that otherwise would have had TODO names.
116 This attempts to find an identical message in the GRDFILE
117 and uses that instead of the automatically placeholderized
118 message.
119
120 --pre CLASS Specify an optional, fully qualified classname, which
121 has to be a subclass of grit.tool.PreProcessor, to
122 run on the text of the RC file before conversion occurs.
123 This can be used to support constructs in the RC files
124 that GRIT cannot handle on its own.
125
126 --post CLASS Specify an optional, fully qualified classname, which
127 has to be a subclass of grit.tool.PostProcessor, to
128 run on the text of the converted RC file.
129 This can be used to alter the content of the RC file
130 based on the conversion that occured.
131
132 For menus, dialogs and version info, the .grd file will refer to the original
133 .rc file. Once conversion is complete, you can strip the original .rc file
134 of its string table and all comments as these will be available in the .grd
135 file.
136
137 Note that this tool WILL NOT obey C preprocessor rules, so even if something
138 is #if 0-ed out it will still be included in the output of this tool
139 Therefore, if your .rc file contains sections like this, you should run the
140 C preprocessor on the .rc file or manually edit it before using this tool.
141 '''
142
143 def ShortDescription(self):
144 return 'A tool for converting .rc source files to .grd files.'
145
146 def __init__(self):
147 self.input_encoding = 'cp1252'
148 self.html_type = 'tr_html'
149 self.html_encoding = 'utf-8'
150 self.not_localizable_re = re.compile('Not locali(s|z)able')
151 self.role_model = None
152 self.pre_process = None
153 self.post_process = None
154
155 def ParseOptions(self, args):
156 '''Given a list of arguments, set this object's options and return
157 all non-option arguments.
158 '''
159 (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
160 for (key, val) in own_opts:
161 if key == '-e':
162 self.input_encoding = val
163 elif key == '-h':
164 self.html_type = val
165 elif key == '-u':
166 self.html_encoding = val
167 elif key == '-n':
168 self.not_localizable_re = re.compile(val)
169 elif key == '-r':
170 self.role_model = grd_reader.Parse(val)
171 elif key == '--pre':
172 self.pre_process = val
173 elif key == '--post':
174 self.post_process = val
175 return args
176
177 def Run(self, opts, args):
178 args = self.ParseOptions(args)
179 if len(args) != 1:
180 print ('This tool takes a single tool-specific argument, the path to the\n '
181 '.rc file to process.')
182 return 2
183 self.SetOptions(opts)
184
185 path = args[0]
186 out_path = os.path.join(util.dirname(path),
187 os.path.splitext(os.path.basename(path))[0] + '.grd')
188
189 rcfile = util.WrapInputStream(file(path, 'r'), self.input_encoding)
190 rctext = rcfile.read()
191
192 grd_text = unicode(self.Process(rctext, path))
193
194 rcfile.close()
195
196 outfile = util.WrapOutputStream(file(out_path, 'w'), 'utf-8')
197 outfile.write(grd_text)
198 outfile.close()
199
200 print 'Wrote output file %s.\nPlease check for TODO items in the file.' % ou t_path
201
202
203 def Process(self, rctext, rc_path):
204 '''Processes 'rctext' and returns a resource tree corresponding to it.
205
206 Args:
207 rctext: complete text of the rc file
208 rc_path: 'resource\resource.rc'
209
210 Return:
211 grit.node.base.Node subclass
212 '''
213
214 if self.pre_process:
215 preprocess_class = util.NewClassInstance(self.pre_process,
216 preprocess_interface.PreProcessor )
217 if preprocess_class:
218 rctext = preprocess_class.Process(rctext, rc_path)
219 else:
220 self.Out(
221 'PreProcessing class could not be found. Skipping preprocessing.\n')
222
223 # Start with a basic skeleton for the .grd file
224 root = grd_reader.Parse(StringIO.StringIO(
225 '''<?xml version="1.0" encoding="UTF-8"?>
226 <grit base_dir="." latest_public_release="0"
227 current_release="1" source_lang_id="en">
228 <outputs />
229 <translations />
230 <release seq="1">
231 <includes />
232 <structures />
233 <messages />
234 </release>
235 </grit>'''), util.dirname(rc_path))
236 includes = root.children[2].children[0]
237 structures = root.children[2].children[1]
238 messages = root.children[2].children[2]
239 assert (isinstance(includes, grit.node.empty.IncludesNode) and
240 isinstance(structures, grit.node.empty.StructuresNode) and
241 isinstance(messages, grit.node.empty.MessagesNode))
242
243 self.AddIncludes(rctext, includes)
244 self.AddStructures(rctext, structures, os.path.basename(rc_path))
245 self.AddMessages(rctext, messages)
246
247 self.VerboseOut('Validating that all IDs are unique...\n')
248 root.ValidateUniqueIds()
249 self.ExtraVerboseOut('Done validating that all IDs are unique.\n')
250
251 if self.post_process:
252 postprocess_class = util.NewClassInstance(self.post_process,
253 postprocess_interface.PostProces sor)
254 if postprocess_class:
255 root = postprocess_class.Process(rctext, rc_path, root)
256 else:
257 self.Out(
258 'PostProcessing class could not be found. Skipping postprocessing.\n')
259
260 return root
261
262
263 def AddIncludes(self, rctext, node):
264 '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
265 adds each included resource as an <include> child node of 'node'.'''
266 for m in _FILE_REF.finditer(rctext):
267 id = m.group('id')
268 type = m.group('type').upper()
269 fname = rc.Section.UnEscape(m.group('file'))
270 assert fname.find('\n') == -1
271 if type != 'HTML':
272 self.VerboseOut('Processing %s with ID %s (filename: %s)\n' % (type, id, fname))
273 node.AddChild(include.IncludeNode.Construct(node, id, type, fname))
274
275
276 def AddStructures(self, rctext, node, rc_filename):
277 '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
278 information resources and HTML templates) and adds each as a <structure>
279 child of 'node'.'''
280 # First add HTML includes
281 for m in _FILE_REF.finditer(rctext):
282 id = m.group('id')
283 type = m.group('type').upper()
284 fname = rc.Section.UnEscape(m.group('file'))
285 if type == 'HTML':
286 node.AddChild(structure.StructureNode.Construct(
287 node, id, self.html_type, fname, self.html_encoding))
288
289 # Then add all RC includes
290 def AddStructure(type, id):
291 self.VerboseOut('Processing %s with ID %s\n' % (type, id))
292 node.AddChild(structure.StructureNode.Construct(node, id, type,
293 rc_filename,
294 encoding=self.input_encodi ng))
295 for m in _MENU.finditer(rctext):
296 AddStructure('menu', m.group('id'))
297 for m in _DIALOG.finditer(rctext):
298 AddStructure('dialog', m.group('id'))
299 for m in _VERSIONINFO.finditer(rctext):
300 AddStructure('version', m.group('id'))
301
302
303 def AddMessages(self, rctext, node):
304 '''Scans 'rctext' for all messages in string tables, preprocesses them as
305 much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
306 type format specifiers get those specifiers replaced with placeholders, and
307 HTML-formatted messages get run through the HTML-placeholderizer). Adds
308 each message as a <message> node child of 'node'.'''
309 for tm in _STRING_TABLE.finditer(rctext):
310 table = tm.group('body')
311 for mm in _MESSAGE.finditer(table):
312 comment_block = mm.group('comment')
313 comment_text = []
314 for cm in _COMMENT_TEXT.finditer(comment_block):
315 comment_text.append(cm.group('text'))
316 comment_text = ' '.join(comment_text)
317
318 id = mm.group('id')
319 text = rc.Section.UnEscape(mm.group('text'))
320
321 self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))
322
323 msg_obj = self.Placeholderize(text)
324
325 # Messages that contain only placeholders do not need translation.
326 is_translateable = False
327 for item in msg_obj.GetContent():
328 if isinstance(item, types.StringTypes):
329 if not _WHITESPACE_ONLY.match(item):
330 is_translateable = True
331
332 if self.not_localizable_re.search(comment_text):
333 is_translateable = False
334
335 message_meaning = ''
336 internal_comment = ''
337
338 # If we have a "role model" (existing GRD file) and this node exists
339 # in the role model, use the description, meaning and translateable
340 # attributes from the role model.
341 if self.role_model:
342 role_node = self.role_model.GetNodeById(id)
343 if role_node:
344 is_translateable = role_node.IsTranslateable()
345 message_meaning = role_node.attrs['meaning']
346 comment_text = role_node.attrs['desc']
347 internal_comment = role_node.attrs['internal_comment']
348
349 # For nontranslateable messages, we don't want the complexity of
350 # placeholderizing everything.
351 if not is_translateable:
352 msg_obj = tclib.Message(text=text)
353
354 msg_node = message.MessageNode.Construct(node, msg_obj, id,
355 desc=comment_text,
356 translateable=is_translateable,
357 meaning=message_meaning)
358 msg_node.attrs['internal_comment'] = internal_comment
359
360 node.AddChild(msg_node)
361 self.ExtraVerboseOut('Done processing message %s\n' % id)
362
363
364 def Placeholderize(self, text):
365 '''Creates a tclib.Message object from 'text', attempting to recognize
366 a few different formats of text that can be automatically placeholderized
367 (HTML code, printf-style format strings, and FormatMessage-style format
368 strings).
369 '''
370
371 try:
372 # First try HTML placeholderizing.
373 # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
374 msg = tr_html.HtmlToMessage(text, True)
375 for item in msg.GetContent():
376 if not isinstance(item, types.StringTypes):
377 return msg # Contained at least one placeholder, so we're done
378
379 # HTML placeholderization didn't do anything, so try to find printf or
380 # FormatMessage format specifiers and change them into placeholders.
381 msg = tclib.Message()
382 parts = _FORMAT_SPECIFIER.split(text)
383 todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc.
384 for part in parts:
385 if _FORMAT_SPECIFIER.match(part):
386 msg.AppendPlaceholder(tclib.Placeholder(
387 'TODO_%04d' % todo_counter, part, 'TODO'))
388 todo_counter += 1
389 elif part != '':
390 msg.AppendText(part)
391
392 if self.role_model and len(parts) > 1: # there are TODO placeholders
393 role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
394 msg.GetRealContent(), '')
395 if role_model_msg:
396 # replace wholesale to get placeholder names and examples
397 msg = role_model_msg
398
399 return msg
400 except:
401 print 'Exception processing message with text "%s"' % text
402 raise
403
OLDNEW
« no previous file with comments | « grit/tool/preprocess_unittest.py ('k') | grit/tool/rc2grd_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698