Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(310)

Side by Side Diff: grit/tool/rc2grd.py

Issue 1442863002: Remove contents of grit's SVN repository. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « grit/tool/preprocess_unittest.py ('k') | grit/tool/rc2grd_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 '''The 'grit rc2grd' tool.'''
7
8
9 import os.path
10 import getopt
11 import re
12 import StringIO
13 import types
14
15 import grit.node.empty
16 from grit.node import include
17 from grit.node import structure
18 from grit.node import message
19
20 from grit.gather import rc
21 from grit.gather import tr_html
22
23 from grit.tool import interface
24 from grit.tool import postprocess_interface
25 from grit.tool import preprocess_interface
26
27 from grit import grd_reader
28 from grit import lazy_re
29 from grit import tclib
30 from grit import util
31
32
33 # Matches files referenced from an .rc file
34 _FILE_REF = lazy_re.compile('''
35 ^(?P<id>[A-Z_0-9.]+)[ \t]+
36 (?P<type>[A-Z_0-9]+)[ \t]+
37 "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE)
38
39
40 # Matches a dialog section
41 _DIALOG = lazy_re.compile(
42 '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$',
43 re.MULTILINE | re.DOTALL)
44
45
46 # Matches a menu section
47 _MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$',
48 re.MULTILINE | re.DOTALL)
49
50
51 # Matches a versioninfo section
52 _VERSIONINFO = lazy_re.compile(
53 '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$',
54 re.MULTILINE | re.DOTALL)
55
56
57 # Matches a stringtable
58 _STRING_TABLE = lazy_re.compile(
59 ('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|'
60 'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'),
61 re.MULTILINE | re.DOTALL)
62
63
64 # Matches each message inside a stringtable, breaking it up into comments,
65 # the ID of the message, and the (RC-escaped) message text.
66 _MESSAGE = lazy_re.compile('''
67 (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message
68 ^\s*
69 (?P<id>[A-Za-z0-9_]+) # id
70 \s+
71 "(?P<text>.*?([^"]|""))"([^"]|$) # The message itself
72 ''', re.MULTILINE | re.DOTALL | re.VERBOSE)
73
74
75 # Matches each line of comment text in a multi-line comment.
76 _COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE)
77
78
79 # Matches a string that is empty or all whitespace
80 _WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE)
81
82
83 # Finds printf and FormatMessage style format specifiers
84 # Uses non-capturing groups except for the outermost group, so the output of
85 # re.split() should include both the normal text and what we intend to
86 # replace with placeholders.
87 # TODO(joi) Check documentation for printf (and Windows variants) and FormatMess age
88 _FORMAT_SPECIFIER = lazy_re.compile(
89 '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last cha r
90 '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)' # printf last char
91 '|\$[1-9][0-9]*)') # FormatMessage
92
93
94 class Rc2Grd(interface.Tool):
95 '''A tool for converting .rc files to .grd files. This tool is only for
96 converting the source (nontranslated) .rc file to a .grd file. For importing
97 existing translations, use the rc2xtb tool.
98
99 Usage: grit [global options] rc2grd [OPTIONS] RCFILE
100
101 The tool takes a single argument, which is the path to the .rc file to convert.
102 It outputs a .grd file with the same name in the same directory as the .rc file.
103 The .grd file may have one or more TODO comments for things that have to be
104 cleaned up manually.
105
106 OPTIONS may be any of the following:
107
108 -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'.
109
110 -h TYPE Specify the TYPE attribute for HTML structures.
111 Default is 'tr_html'.
112
113 -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'.
114
115 -n MATCH Specify the regular expression to match in comments that will
116 indicate that the resource the comment belongs to is not
117 translateable. Default is 'Not locali(s|z)able'.
118
119 -r GRDFILE Specify that GRDFILE should be used as a "role model" for
120 any placeholders that otherwise would have had TODO names.
121 This attempts to find an identical message in the GRDFILE
122 and uses that instead of the automatically placeholderized
123 message.
124
125 --pre CLASS Specify an optional, fully qualified classname, which
126 has to be a subclass of grit.tool.PreProcessor, to
127 run on the text of the RC file before conversion occurs.
128 This can be used to support constructs in the RC files
129 that GRIT cannot handle on its own.
130
131 --post CLASS Specify an optional, fully qualified classname, which
132 has to be a subclass of grit.tool.PostProcessor, to
133 run on the text of the converted RC file.
134 This can be used to alter the content of the RC file
135 based on the conversion that occured.
136
137 For menus, dialogs and version info, the .grd file will refer to the original
138 .rc file. Once conversion is complete, you can strip the original .rc file
139 of its string table and all comments as these will be available in the .grd
140 file.
141
142 Note that this tool WILL NOT obey C preprocessor rules, so even if something
143 is #if 0-ed out it will still be included in the output of this tool
144 Therefore, if your .rc file contains sections like this, you should run the
145 C preprocessor on the .rc file or manually edit it before using this tool.
146 '''
147
148 def ShortDescription(self):
149 return 'A tool for converting .rc source files to .grd files.'
150
151 def __init__(self):
152 self.input_encoding = 'cp1252'
153 self.html_type = 'tr_html'
154 self.html_encoding = 'utf-8'
155 self.not_localizable_re = re.compile('Not locali(s|z)able')
156 self.role_model = None
157 self.pre_process = None
158 self.post_process = None
159
160 def ParseOptions(self, args):
161 '''Given a list of arguments, set this object's options and return
162 all non-option arguments.
163 '''
164 (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
165 for (key, val) in own_opts:
166 if key == '-e':
167 self.input_encoding = val
168 elif key == '-h':
169 self.html_type = val
170 elif key == '-u':
171 self.html_encoding = val
172 elif key == '-n':
173 self.not_localizable_re = re.compile(val)
174 elif key == '-r':
175 self.role_model = grd_reader.Parse(val)
176 elif key == '--pre':
177 self.pre_process = val
178 elif key == '--post':
179 self.post_process = val
180 return args
181
182 def Run(self, opts, args):
183 args = self.ParseOptions(args)
184 if len(args) != 1:
185 print ('This tool takes a single tool-specific argument, the path to the\n '
186 '.rc file to process.')
187 return 2
188 self.SetOptions(opts)
189
190 path = args[0]
191 out_path = os.path.join(util.dirname(path),
192 os.path.splitext(os.path.basename(path))[0] + '.grd')
193
194 rctext = util.ReadFile(path, self.input_encoding)
195 grd_text = unicode(self.Process(rctext, path))
196 with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile:
197 outfile.write(grd_text)
198
199 print 'Wrote output file %s.\nPlease check for TODO items in the file.' % ou t_path
200
201
202 def Process(self, rctext, rc_path):
203 '''Processes 'rctext' and returns a resource tree corresponding to it.
204
205 Args:
206 rctext: complete text of the rc file
207 rc_path: 'resource\resource.rc'
208
209 Return:
210 grit.node.base.Node subclass
211 '''
212
213 if self.pre_process:
214 preprocess_class = util.NewClassInstance(self.pre_process,
215 preprocess_interface.PreProcessor )
216 if preprocess_class:
217 rctext = preprocess_class.Process(rctext, rc_path)
218 else:
219 self.Out(
220 'PreProcessing class could not be found. Skipping preprocessing.\n')
221
222 # Start with a basic skeleton for the .grd file
223 root = grd_reader.Parse(StringIO.StringIO(
224 '''<?xml version="1.0" encoding="UTF-8"?>
225 <grit base_dir="." latest_public_release="0"
226 current_release="1" source_lang_id="en">
227 <outputs />
228 <translations />
229 <release seq="1">
230 <includes />
231 <structures />
232 <messages />
233 </release>
234 </grit>'''), util.dirname(rc_path))
235 includes = root.children[2].children[0]
236 structures = root.children[2].children[1]
237 messages = root.children[2].children[2]
238 assert (isinstance(includes, grit.node.empty.IncludesNode) and
239 isinstance(structures, grit.node.empty.StructuresNode) and
240 isinstance(messages, grit.node.empty.MessagesNode))
241
242 self.AddIncludes(rctext, includes)
243 self.AddStructures(rctext, structures, os.path.basename(rc_path))
244 self.AddMessages(rctext, messages)
245
246 self.VerboseOut('Validating that all IDs are unique...\n')
247 root.ValidateUniqueIds()
248 self.ExtraVerboseOut('Done validating that all IDs are unique.\n')
249
250 if self.post_process:
251 postprocess_class = util.NewClassInstance(self.post_process,
252 postprocess_interface.PostProces sor)
253 if postprocess_class:
254 root = postprocess_class.Process(rctext, rc_path, root)
255 else:
256 self.Out(
257 'PostProcessing class could not be found. Skipping postprocessing.\n')
258
259 return root
260
261
262 def IsHtml(self, res_type, fname):
263 '''Check whether both the type and file extension indicate HTML'''
264 fext = fname.split('.')[-1].lower()
265 return res_type == 'HTML' and fext in ('htm', 'html')
266
267
268 def AddIncludes(self, rctext, node):
269 '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
270 adds each included resource as an <include> child node of 'node'.'''
271 for m in _FILE_REF.finditer(rctext):
272 id = m.group('id')
273 res_type = m.group('type').upper()
274 fname = rc.Section.UnEscape(m.group('file'))
275 assert fname.find('\n') == -1
276 if not self.IsHtml(res_type, fname):
277 self.VerboseOut('Processing %s with ID %s (filename: %s)\n' %
278 (res_type, id, fname))
279 node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname))
280
281
282 def AddStructures(self, rctext, node, rc_filename):
283 '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
284 information resources and HTML templates) and adds each as a <structure>
285 child of 'node'.'''
286 # First add HTML includes
287 for m in _FILE_REF.finditer(rctext):
288 id = m.group('id')
289 res_type = m.group('type').upper()
290 fname = rc.Section.UnEscape(m.group('file'))
291 if self.IsHtml(type, fname):
292 node.AddChild(structure.StructureNode.Construct(
293 node, id, self.html_type, fname, self.html_encoding))
294
295 # Then add all RC includes
296 def AddStructure(res_type, id):
297 self.VerboseOut('Processing %s with ID %s\n' % (res_type, id))
298 node.AddChild(structure.StructureNode.Construct(node, id, res_type,
299 rc_filename,
300 encoding=self.input_encodi ng))
301 for m in _MENU.finditer(rctext):
302 AddStructure('menu', m.group('id'))
303 for m in _DIALOG.finditer(rctext):
304 AddStructure('dialog', m.group('id'))
305 for m in _VERSIONINFO.finditer(rctext):
306 AddStructure('version', m.group('id'))
307
308
309 def AddMessages(self, rctext, node):
310 '''Scans 'rctext' for all messages in string tables, preprocesses them as
311 much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
312 type format specifiers get those specifiers replaced with placeholders, and
313 HTML-formatted messages get run through the HTML-placeholderizer). Adds
314 each message as a <message> node child of 'node'.'''
315 for tm in _STRING_TABLE.finditer(rctext):
316 table = tm.group('body')
317 for mm in _MESSAGE.finditer(table):
318 comment_block = mm.group('comment')
319 comment_text = []
320 for cm in _COMMENT_TEXT.finditer(comment_block):
321 comment_text.append(cm.group('text'))
322 comment_text = ' '.join(comment_text)
323
324 id = mm.group('id')
325 text = rc.Section.UnEscape(mm.group('text'))
326
327 self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))
328
329 msg_obj = self.Placeholderize(text)
330
331 # Messages that contain only placeholders do not need translation.
332 is_translateable = False
333 for item in msg_obj.GetContent():
334 if isinstance(item, types.StringTypes):
335 if not _WHITESPACE_ONLY.match(item):
336 is_translateable = True
337
338 if self.not_localizable_re.search(comment_text):
339 is_translateable = False
340
341 message_meaning = ''
342 internal_comment = ''
343
344 # If we have a "role model" (existing GRD file) and this node exists
345 # in the role model, use the description, meaning and translateable
346 # attributes from the role model.
347 if self.role_model:
348 role_node = self.role_model.GetNodeById(id)
349 if role_node:
350 is_translateable = role_node.IsTranslateable()
351 message_meaning = role_node.attrs['meaning']
352 comment_text = role_node.attrs['desc']
353 internal_comment = role_node.attrs['internal_comment']
354
355 # For nontranslateable messages, we don't want the complexity of
356 # placeholderizing everything.
357 if not is_translateable:
358 msg_obj = tclib.Message(text=text)
359
360 msg_node = message.MessageNode.Construct(node, msg_obj, id,
361 desc=comment_text,
362 translateable=is_translateable,
363 meaning=message_meaning)
364 msg_node.attrs['internal_comment'] = internal_comment
365
366 node.AddChild(msg_node)
367 self.ExtraVerboseOut('Done processing message %s\n' % id)
368
369
370 def Placeholderize(self, text):
371 '''Creates a tclib.Message object from 'text', attempting to recognize
372 a few different formats of text that can be automatically placeholderized
373 (HTML code, printf-style format strings, and FormatMessage-style format
374 strings).
375 '''
376
377 try:
378 # First try HTML placeholderizing.
379 # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
380 msg = tr_html.HtmlToMessage(text, True)
381 for item in msg.GetContent():
382 if not isinstance(item, types.StringTypes):
383 return msg # Contained at least one placeholder, so we're done
384
385 # HTML placeholderization didn't do anything, so try to find printf or
386 # FormatMessage format specifiers and change them into placeholders.
387 msg = tclib.Message()
388 parts = _FORMAT_SPECIFIER.split(text)
389 todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc.
390 for part in parts:
391 if _FORMAT_SPECIFIER.match(part):
392 msg.AppendPlaceholder(tclib.Placeholder(
393 'TODO_%04d' % todo_counter, part, 'TODO'))
394 todo_counter += 1
395 elif part != '':
396 msg.AppendText(part)
397
398 if self.role_model and len(parts) > 1: # there are TODO placeholders
399 role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
400 msg.GetRealContent(), '')
401 if role_model_msg:
402 # replace wholesale to get placeholder names and examples
403 msg = role_model_msg
404
405 return msg
406 except:
407 print 'Exception processing message with text "%s"' % text
408 raise
409
OLDNEW
« no previous file with comments | « grit/tool/preprocess_unittest.py ('k') | grit/tool/rc2grd_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698