OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python2.4 |
| 2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 '''The 'grit rc2grd' tool.''' |
| 7 |
| 8 |
| 9 import os.path |
| 10 import getopt |
| 11 import re |
| 12 import StringIO |
| 13 import types |
| 14 |
| 15 import grit.node.empty |
| 16 from grit.node import include |
| 17 from grit.node import structure |
| 18 from grit.node import message |
| 19 |
| 20 from grit.gather import rc |
| 21 from grit.gather import tr_html |
| 22 |
| 23 from grit.tool import interface |
| 24 from grit.tool import postprocess_interface |
| 25 from grit.tool import preprocess_interface |
| 26 |
| 27 from grit import grd_reader |
| 28 from grit import tclib |
| 29 from grit import util |
| 30 |
| 31 |
| 32 # Matches files referenced from an .rc file |
| 33 _FILE_REF = re.compile(''' |
| 34 ^(?P<id>[A-Z_0-9.]+)[ \t]+ |
| 35 (?P<type>[A-Z_0-9]+)[ \t]+ |
| 36 "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE) |
| 37 |
| 38 |
| 39 # Matches a dialog section |
| 40 _DIALOG = re.compile('^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*
$', |
| 41 re.MULTILINE | re.DOTALL) |
| 42 |
| 43 |
| 44 # Matches a menu section |
| 45 _MENU = re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$', |
| 46 re.MULTILINE | re.DOTALL) |
| 47 |
| 48 |
| 49 # Matches a versioninfo section |
| 50 _VERSIONINFO = re.compile('^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^E
ND\s*$', |
| 51 re.MULTILINE | re.DOTALL) |
| 52 |
| 53 |
| 54 # Matches a stringtable |
| 55 _STRING_TABLE = re.compile('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS
.+|LANGUAGE.+|VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$', |
| 56 re.MULTILINE | re.DOTALL) |
| 57 |
| 58 |
| 59 # Matches each message inside a stringtable, breaking it up into comments, |
| 60 # the ID of the message, and the (RC-escaped) message text. |
| 61 _MESSAGE = re.compile(''' |
| 62 (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message |
| 63 ^\s* |
| 64 (?P<id>[A-Za-z0-9_]+) # id |
| 65 \s+ |
| 66 "(?P<text>.*?([^"]|""))"([^"]|$) # The message itself |
| 67 ''', re.MULTILINE | re.DOTALL | re.VERBOSE) |
| 68 |
| 69 |
| 70 # Matches each line of comment text in a multi-line comment. |
| 71 _COMMENT_TEXT = re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE) |
| 72 |
| 73 |
| 74 # Matches a string that is empty or all whitespace |
| 75 _WHITESPACE_ONLY = re.compile('\A\s*\Z', re.MULTILINE) |
| 76 |
| 77 |
| 78 # Finds printf and FormatMessage style format specifiers |
| 79 # Uses non-capturing groups except for the outermost group, so the output of |
| 80 # re.split() should include both the normal text and what we intend to |
| 81 # replace with placeholders. |
| 82 # TODO(joi) Check documentation for printf (and Windows variants) and FormatMess
age |
| 83 _FORMAT_SPECIFIER = re.compile( |
| 84 '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last cha
r |
| 85 '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)' # printf last char |
| 86 '|\$[1-9][0-9]*)') # FormatMessage |
| 87 |
| 88 |
| 89 class Rc2Grd(interface.Tool): |
| 90 '''A tool for converting .rc files to .grd files. This tool is only for |
| 91 converting the source (nontranslated) .rc file to a .grd file. For importing |
| 92 existing translations, use the rc2xtb tool. |
| 93 |
| 94 Usage: grit [global options] rc2grd [OPTIONS] RCFILE |
| 95 |
| 96 The tool takes a single argument, which is the path to the .rc file to convert. |
| 97 It outputs a .grd file with the same name in the same directory as the .rc file. |
| 98 The .grd file may have one or more TODO comments for things that have to be |
| 99 cleaned up manually. |
| 100 |
| 101 OPTIONS may be any of the following: |
| 102 |
| 103 -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'. |
| 104 |
| 105 -h TYPE Specify the TYPE attribute for HTML structures. |
| 106 Default is 'tr_html'. |
| 107 |
| 108 -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'. |
| 109 |
| 110 -n MATCH Specify the regular expression to match in comments that will |
| 111 indicate that the resource the comment belongs to is not |
| 112 translateable. Default is 'Not locali(s|z)able'. |
| 113 |
| 114 -r GRDFILE Specify that GRDFILE should be used as a "role model" for |
| 115 any placeholders that otherwise would have had TODO names. |
| 116 This attempts to find an identical message in the GRDFILE |
| 117 and uses that instead of the automatically placeholderized |
| 118 message. |
| 119 |
| 120 --pre CLASS Specify an optional, fully qualified classname, which |
| 121 has to be a subclass of grit.tool.PreProcessor, to |
| 122 run on the text of the RC file before conversion occurs. |
| 123 This can be used to support constructs in the RC files |
| 124 that GRIT cannot handle on its own. |
| 125 |
| 126 --post CLASS Specify an optional, fully qualified classname, which |
| 127 has to be a subclass of grit.tool.PostProcessor, to |
| 128 run on the text of the converted RC file. |
| 129 This can be used to alter the content of the RC file |
| 130 based on the conversion that occured. |
| 131 |
| 132 For menus, dialogs and version info, the .grd file will refer to the original |
| 133 .rc file. Once conversion is complete, you can strip the original .rc file |
| 134 of its string table and all comments as these will be available in the .grd |
| 135 file. |
| 136 |
| 137 Note that this tool WILL NOT obey C preprocessor rules, so even if something |
| 138 is #if 0-ed out it will still be included in the output of this tool |
| 139 Therefore, if your .rc file contains sections like this, you should run the |
| 140 C preprocessor on the .rc file or manually edit it before using this tool. |
| 141 ''' |
| 142 |
| 143 def ShortDescription(self): |
| 144 return 'A tool for converting .rc source files to .grd files.' |
| 145 |
| 146 def __init__(self): |
| 147 self.input_encoding = 'cp1252' |
| 148 self.html_type = 'tr_html' |
| 149 self.html_encoding = 'utf-8' |
| 150 self.not_localizable_re = re.compile('Not locali(s|z)able') |
| 151 self.role_model = None |
| 152 self.pre_process = None |
| 153 self.post_process = None |
| 154 |
| 155 def ParseOptions(self, args): |
| 156 '''Given a list of arguments, set this object's options and return |
| 157 all non-option arguments. |
| 158 ''' |
| 159 (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post=']) |
| 160 for (key, val) in own_opts: |
| 161 if key == '-e': |
| 162 self.input_encoding = val |
| 163 elif key == '-h': |
| 164 self.html_type = val |
| 165 elif key == '-u': |
| 166 self.html_encoding = val |
| 167 elif key == '-n': |
| 168 self.not_localizable_re = re.compile(val) |
| 169 elif key == '-r': |
| 170 self.role_model = grd_reader.Parse(val) |
| 171 elif key == '--pre': |
| 172 self.pre_process = val |
| 173 elif key == '--post': |
| 174 self.post_process = val |
| 175 return args |
| 176 |
| 177 def Run(self, opts, args): |
| 178 args = self.ParseOptions(args) |
| 179 if len(args) != 1: |
| 180 print ('This tool takes a single tool-specific argument, the path to the\n
' |
| 181 '.rc file to process.') |
| 182 return 2 |
| 183 self.SetOptions(opts) |
| 184 |
| 185 path = args[0] |
| 186 out_path = os.path.join(util.dirname(path), |
| 187 os.path.splitext(os.path.basename(path))[0] + '.grd') |
| 188 |
| 189 rcfile = util.WrapInputStream(file(path, 'r'), self.input_encoding) |
| 190 rctext = rcfile.read() |
| 191 |
| 192 grd_text = unicode(self.Process(rctext, path)) |
| 193 |
| 194 rcfile.close() |
| 195 |
| 196 outfile = util.WrapOutputStream(file(out_path, 'w'), 'utf-8') |
| 197 outfile.write(grd_text) |
| 198 outfile.close() |
| 199 |
| 200 print 'Wrote output file %s.\nPlease check for TODO items in the file.' % ou
t_path |
| 201 |
| 202 |
| 203 def Process(self, rctext, rc_path): |
| 204 '''Processes 'rctext' and returns a resource tree corresponding to it. |
| 205 |
| 206 Args: |
| 207 rctext: complete text of the rc file |
| 208 rc_path: 'resource\resource.rc' |
| 209 |
| 210 Return: |
| 211 grit.node.base.Node subclass |
| 212 ''' |
| 213 |
| 214 if self.pre_process: |
| 215 preprocess_class = util.NewClassInstance(self.pre_process, |
| 216 preprocess_interface.PreProcessor
) |
| 217 if preprocess_class: |
| 218 rctext = preprocess_class.Process(rctext, rc_path) |
| 219 else: |
| 220 self.Out( |
| 221 'PreProcessing class could not be found. Skipping preprocessing.\n') |
| 222 |
| 223 # Start with a basic skeleton for the .grd file |
| 224 root = grd_reader.Parse(StringIO.StringIO( |
| 225 '''<?xml version="1.0" encoding="UTF-8"?> |
| 226 <grit base_dir="." latest_public_release="0" |
| 227 current_release="1" source_lang_id="en"> |
| 228 <outputs /> |
| 229 <translations /> |
| 230 <release seq="1"> |
| 231 <includes /> |
| 232 <structures /> |
| 233 <messages /> |
| 234 </release> |
| 235 </grit>'''), util.dirname(rc_path)) |
| 236 includes = root.children[2].children[0] |
| 237 structures = root.children[2].children[1] |
| 238 messages = root.children[2].children[2] |
| 239 assert (isinstance(includes, grit.node.empty.IncludesNode) and |
| 240 isinstance(structures, grit.node.empty.StructuresNode) and |
| 241 isinstance(messages, grit.node.empty.MessagesNode)) |
| 242 |
| 243 self.AddIncludes(rctext, includes) |
| 244 self.AddStructures(rctext, structures, os.path.basename(rc_path)) |
| 245 self.AddMessages(rctext, messages) |
| 246 |
| 247 self.VerboseOut('Validating that all IDs are unique...\n') |
| 248 root.ValidateUniqueIds() |
| 249 self.ExtraVerboseOut('Done validating that all IDs are unique.\n') |
| 250 |
| 251 if self.post_process: |
| 252 postprocess_class = util.NewClassInstance(self.post_process, |
| 253 postprocess_interface.PostProces
sor) |
| 254 if postprocess_class: |
| 255 root = postprocess_class.Process(rctext, rc_path, root) |
| 256 else: |
| 257 self.Out( |
| 258 'PostProcessing class could not be found. Skipping postprocessing.\n') |
| 259 |
| 260 return root |
| 261 |
| 262 |
| 263 def AddIncludes(self, rctext, node): |
| 264 '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and |
| 265 adds each included resource as an <include> child node of 'node'.''' |
| 266 for m in _FILE_REF.finditer(rctext): |
| 267 id = m.group('id') |
| 268 type = m.group('type').upper() |
| 269 fname = rc.Section.UnEscape(m.group('file')) |
| 270 assert fname.find('\n') == -1 |
| 271 if type != 'HTML': |
| 272 self.VerboseOut('Processing %s with ID %s (filename: %s)\n' % (type, id,
fname)) |
| 273 node.AddChild(include.IncludeNode.Construct(node, id, type, fname)) |
| 274 |
| 275 |
| 276 def AddStructures(self, rctext, node, rc_filename): |
| 277 '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version |
| 278 information resources and HTML templates) and adds each as a <structure> |
| 279 child of 'node'.''' |
| 280 # First add HTML includes |
| 281 for m in _FILE_REF.finditer(rctext): |
| 282 id = m.group('id') |
| 283 type = m.group('type').upper() |
| 284 fname = rc.Section.UnEscape(m.group('file')) |
| 285 if type == 'HTML': |
| 286 node.AddChild(structure.StructureNode.Construct( |
| 287 node, id, self.html_type, fname, self.html_encoding)) |
| 288 |
| 289 # Then add all RC includes |
| 290 def AddStructure(type, id): |
| 291 self.VerboseOut('Processing %s with ID %s\n' % (type, id)) |
| 292 node.AddChild(structure.StructureNode.Construct(node, id, type, |
| 293 rc_filename, |
| 294 encoding=self.input_encodi
ng)) |
| 295 for m in _MENU.finditer(rctext): |
| 296 AddStructure('menu', m.group('id')) |
| 297 for m in _DIALOG.finditer(rctext): |
| 298 AddStructure('dialog', m.group('id')) |
| 299 for m in _VERSIONINFO.finditer(rctext): |
| 300 AddStructure('version', m.group('id')) |
| 301 |
| 302 |
| 303 def AddMessages(self, rctext, node): |
| 304 '''Scans 'rctext' for all messages in string tables, preprocesses them as |
| 305 much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d |
| 306 type format specifiers get those specifiers replaced with placeholders, and |
| 307 HTML-formatted messages get run through the HTML-placeholderizer). Adds |
| 308 each message as a <message> node child of 'node'.''' |
| 309 for tm in _STRING_TABLE.finditer(rctext): |
| 310 table = tm.group('body') |
| 311 for mm in _MESSAGE.finditer(table): |
| 312 comment_block = mm.group('comment') |
| 313 comment_text = [] |
| 314 for cm in _COMMENT_TEXT.finditer(comment_block): |
| 315 comment_text.append(cm.group('text')) |
| 316 comment_text = ' '.join(comment_text) |
| 317 |
| 318 id = mm.group('id') |
| 319 text = rc.Section.UnEscape(mm.group('text')) |
| 320 |
| 321 self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text)) |
| 322 |
| 323 msg_obj = self.Placeholderize(text) |
| 324 |
| 325 # Messages that contain only placeholders do not need translation. |
| 326 is_translateable = False |
| 327 for item in msg_obj.GetContent(): |
| 328 if isinstance(item, types.StringTypes): |
| 329 if not _WHITESPACE_ONLY.match(item): |
| 330 is_translateable = True |
| 331 |
| 332 if self.not_localizable_re.search(comment_text): |
| 333 is_translateable = False |
| 334 |
| 335 message_meaning = '' |
| 336 internal_comment = '' |
| 337 |
| 338 # If we have a "role model" (existing GRD file) and this node exists |
| 339 # in the role model, use the description, meaning and translateable |
| 340 # attributes from the role model. |
| 341 if self.role_model: |
| 342 role_node = self.role_model.GetNodeById(id) |
| 343 if role_node: |
| 344 is_translateable = role_node.IsTranslateable() |
| 345 message_meaning = role_node.attrs['meaning'] |
| 346 comment_text = role_node.attrs['desc'] |
| 347 internal_comment = role_node.attrs['internal_comment'] |
| 348 |
| 349 # For nontranslateable messages, we don't want the complexity of |
| 350 # placeholderizing everything. |
| 351 if not is_translateable: |
| 352 msg_obj = tclib.Message(text=text) |
| 353 |
| 354 msg_node = message.MessageNode.Construct(node, msg_obj, id, |
| 355 desc=comment_text, |
| 356 translateable=is_translateable, |
| 357 meaning=message_meaning) |
| 358 msg_node.attrs['internal_comment'] = internal_comment |
| 359 |
| 360 node.AddChild(msg_node) |
| 361 self.ExtraVerboseOut('Done processing message %s\n' % id) |
| 362 |
| 363 |
| 364 def Placeholderize(self, text): |
| 365 '''Creates a tclib.Message object from 'text', attempting to recognize |
| 366 a few different formats of text that can be automatically placeholderized |
| 367 (HTML code, printf-style format strings, and FormatMessage-style format |
| 368 strings). |
| 369 ''' |
| 370 |
| 371 try: |
| 372 # First try HTML placeholderizing. |
| 373 # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing |
| 374 msg = tr_html.HtmlToMessage(text, True) |
| 375 for item in msg.GetContent(): |
| 376 if not isinstance(item, types.StringTypes): |
| 377 return msg # Contained at least one placeholder, so we're done |
| 378 |
| 379 # HTML placeholderization didn't do anything, so try to find printf or |
| 380 # FormatMessage format specifiers and change them into placeholders. |
| 381 msg = tclib.Message() |
| 382 parts = _FORMAT_SPECIFIER.split(text) |
| 383 todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc. |
| 384 for part in parts: |
| 385 if _FORMAT_SPECIFIER.match(part): |
| 386 msg.AppendPlaceholder(tclib.Placeholder( |
| 387 'TODO_%04d' % todo_counter, part, 'TODO')) |
| 388 todo_counter += 1 |
| 389 elif part != '': |
| 390 msg.AppendText(part) |
| 391 |
| 392 if self.role_model and len(parts) > 1: # there are TODO placeholders |
| 393 role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText( |
| 394 msg.GetRealContent(), '') |
| 395 if role_model_msg: |
| 396 # replace wholesale to get placeholder names and examples |
| 397 msg = role_model_msg |
| 398 |
| 399 return msg |
| 400 except: |
| 401 print 'Exception processing message with text "%s"' % text |
| 402 raise |
| 403 |
OLD | NEW |