| Index: grit/tool/rc2grd.py
|
| ===================================================================
|
| --- grit/tool/rc2grd.py (revision 202)
|
| +++ grit/tool/rc2grd.py (working copy)
|
| @@ -1,409 +0,0 @@
|
| -#!/usr/bin/env python
|
| -# Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -# Use of this source code is governed by a BSD-style license that can be
|
| -# found in the LICENSE file.
|
| -
|
| -'''The 'grit rc2grd' tool.'''
|
| -
|
| -
|
| -import os.path
|
| -import getopt
|
| -import re
|
| -import StringIO
|
| -import types
|
| -
|
| -import grit.node.empty
|
| -from grit.node import include
|
| -from grit.node import structure
|
| -from grit.node import message
|
| -
|
| -from grit.gather import rc
|
| -from grit.gather import tr_html
|
| -
|
| -from grit.tool import interface
|
| -from grit.tool import postprocess_interface
|
| -from grit.tool import preprocess_interface
|
| -
|
| -from grit import grd_reader
|
| -from grit import lazy_re
|
| -from grit import tclib
|
| -from grit import util
|
| -
|
| -
|
| -# Matches files referenced from an .rc file
|
| -_FILE_REF = lazy_re.compile('''
|
| - ^(?P<id>[A-Z_0-9.]+)[ \t]+
|
| - (?P<type>[A-Z_0-9]+)[ \t]+
|
| - "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE)
|
| -
|
| -
|
| -# Matches a dialog section
|
| -_DIALOG = lazy_re.compile(
|
| - '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$',
|
| - re.MULTILINE | re.DOTALL)
|
| -
|
| -
|
| -# Matches a menu section
|
| -_MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$',
|
| - re.MULTILINE | re.DOTALL)
|
| -
|
| -
|
| -# Matches a versioninfo section
|
| -_VERSIONINFO = lazy_re.compile(
|
| - '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$',
|
| - re.MULTILINE | re.DOTALL)
|
| -
|
| -
|
| -# Matches a stringtable
|
| -_STRING_TABLE = lazy_re.compile(
|
| - ('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|'
|
| - 'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'),
|
| - re.MULTILINE | re.DOTALL)
|
| -
|
| -
|
| -# Matches each message inside a stringtable, breaking it up into comments,
|
| -# the ID of the message, and the (RC-escaped) message text.
|
| -_MESSAGE = lazy_re.compile('''
|
| - (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message
|
| - ^\s*
|
| - (?P<id>[A-Za-z0-9_]+) # id
|
| - \s+
|
| - "(?P<text>.*?([^"]|""))"([^"]|$) # The message itself
|
| - ''', re.MULTILINE | re.DOTALL | re.VERBOSE)
|
| -
|
| -
|
| -# Matches each line of comment text in a multi-line comment.
|
| -_COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE)
|
| -
|
| -
|
| -# Matches a string that is empty or all whitespace
|
| -_WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE)
|
| -
|
| -
|
| -# Finds printf and FormatMessage style format specifiers
|
| -# Uses non-capturing groups except for the outermost group, so the output of
|
| -# re.split() should include both the normal text and what we intend to
|
| -# replace with placeholders.
|
| -# TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage
|
| -_FORMAT_SPECIFIER = lazy_re.compile(
|
| - '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last char
|
| - '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)' # printf last char
|
| - '|\$[1-9][0-9]*)') # FormatMessage
|
| -
|
| -
|
| -class Rc2Grd(interface.Tool):
|
| - '''A tool for converting .rc files to .grd files. This tool is only for
|
| -converting the source (nontranslated) .rc file to a .grd file. For importing
|
| -existing translations, use the rc2xtb tool.
|
| -
|
| -Usage: grit [global options] rc2grd [OPTIONS] RCFILE
|
| -
|
| -The tool takes a single argument, which is the path to the .rc file to convert.
|
| -It outputs a .grd file with the same name in the same directory as the .rc file.
|
| -The .grd file may have one or more TODO comments for things that have to be
|
| -cleaned up manually.
|
| -
|
| -OPTIONS may be any of the following:
|
| -
|
| - -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'.
|
| -
|
| - -h TYPE Specify the TYPE attribute for HTML structures.
|
| - Default is 'tr_html'.
|
| -
|
| - -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'.
|
| -
|
| - -n MATCH Specify the regular expression to match in comments that will
|
| - indicate that the resource the comment belongs to is not
|
| - translateable. Default is 'Not locali(s|z)able'.
|
| -
|
| - -r GRDFILE Specify that GRDFILE should be used as a "role model" for
|
| - any placeholders that otherwise would have had TODO names.
|
| - This attempts to find an identical message in the GRDFILE
|
| - and uses that instead of the automatically placeholderized
|
| - message.
|
| -
|
| - --pre CLASS Specify an optional, fully qualified classname, which
|
| - has to be a subclass of grit.tool.PreProcessor, to
|
| - run on the text of the RC file before conversion occurs.
|
| - This can be used to support constructs in the RC files
|
| - that GRIT cannot handle on its own.
|
| -
|
| - --post CLASS Specify an optional, fully qualified classname, which
|
| - has to be a subclass of grit.tool.PostProcessor, to
|
| - run on the text of the converted RC file.
|
| - This can be used to alter the content of the RC file
|
| - based on the conversion that occured.
|
| -
|
| -For menus, dialogs and version info, the .grd file will refer to the original
|
| -.rc file. Once conversion is complete, you can strip the original .rc file
|
| -of its string table and all comments as these will be available in the .grd
|
| -file.
|
| -
|
| -Note that this tool WILL NOT obey C preprocessor rules, so even if something
|
| -is #if 0-ed out it will still be included in the output of this tool
|
| -Therefore, if your .rc file contains sections like this, you should run the
|
| -C preprocessor on the .rc file or manually edit it before using this tool.
|
| -'''
|
| -
|
| - def ShortDescription(self):
|
| - return 'A tool for converting .rc source files to .grd files.'
|
| -
|
| - def __init__(self):
|
| - self.input_encoding = 'cp1252'
|
| - self.html_type = 'tr_html'
|
| - self.html_encoding = 'utf-8'
|
| - self.not_localizable_re = re.compile('Not locali(s|z)able')
|
| - self.role_model = None
|
| - self.pre_process = None
|
| - self.post_process = None
|
| -
|
| - def ParseOptions(self, args):
|
| - '''Given a list of arguments, set this object's options and return
|
| - all non-option arguments.
|
| - '''
|
| - (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
|
| - for (key, val) in own_opts:
|
| - if key == '-e':
|
| - self.input_encoding = val
|
| - elif key == '-h':
|
| - self.html_type = val
|
| - elif key == '-u':
|
| - self.html_encoding = val
|
| - elif key == '-n':
|
| - self.not_localizable_re = re.compile(val)
|
| - elif key == '-r':
|
| - self.role_model = grd_reader.Parse(val)
|
| - elif key == '--pre':
|
| - self.pre_process = val
|
| - elif key == '--post':
|
| - self.post_process = val
|
| - return args
|
| -
|
| - def Run(self, opts, args):
|
| - args = self.ParseOptions(args)
|
| - if len(args) != 1:
|
| - print ('This tool takes a single tool-specific argument, the path to the\n'
|
| - '.rc file to process.')
|
| - return 2
|
| - self.SetOptions(opts)
|
| -
|
| - path = args[0]
|
| - out_path = os.path.join(util.dirname(path),
|
| - os.path.splitext(os.path.basename(path))[0] + '.grd')
|
| -
|
| - rctext = util.ReadFile(path, self.input_encoding)
|
| - grd_text = unicode(self.Process(rctext, path))
|
| - with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile:
|
| - outfile.write(grd_text)
|
| -
|
| - print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path
|
| -
|
| -
|
| - def Process(self, rctext, rc_path):
|
| - '''Processes 'rctext' and returns a resource tree corresponding to it.
|
| -
|
| - Args:
|
| - rctext: complete text of the rc file
|
| - rc_path: 'resource\resource.rc'
|
| -
|
| - Return:
|
| - grit.node.base.Node subclass
|
| - '''
|
| -
|
| - if self.pre_process:
|
| - preprocess_class = util.NewClassInstance(self.pre_process,
|
| - preprocess_interface.PreProcessor)
|
| - if preprocess_class:
|
| - rctext = preprocess_class.Process(rctext, rc_path)
|
| - else:
|
| - self.Out(
|
| - 'PreProcessing class could not be found. Skipping preprocessing.\n')
|
| -
|
| - # Start with a basic skeleton for the .grd file
|
| - root = grd_reader.Parse(StringIO.StringIO(
|
| - '''<?xml version="1.0" encoding="UTF-8"?>
|
| - <grit base_dir="." latest_public_release="0"
|
| - current_release="1" source_lang_id="en">
|
| - <outputs />
|
| - <translations />
|
| - <release seq="1">
|
| - <includes />
|
| - <structures />
|
| - <messages />
|
| - </release>
|
| - </grit>'''), util.dirname(rc_path))
|
| - includes = root.children[2].children[0]
|
| - structures = root.children[2].children[1]
|
| - messages = root.children[2].children[2]
|
| - assert (isinstance(includes, grit.node.empty.IncludesNode) and
|
| - isinstance(structures, grit.node.empty.StructuresNode) and
|
| - isinstance(messages, grit.node.empty.MessagesNode))
|
| -
|
| - self.AddIncludes(rctext, includes)
|
| - self.AddStructures(rctext, structures, os.path.basename(rc_path))
|
| - self.AddMessages(rctext, messages)
|
| -
|
| - self.VerboseOut('Validating that all IDs are unique...\n')
|
| - root.ValidateUniqueIds()
|
| - self.ExtraVerboseOut('Done validating that all IDs are unique.\n')
|
| -
|
| - if self.post_process:
|
| - postprocess_class = util.NewClassInstance(self.post_process,
|
| - postprocess_interface.PostProcessor)
|
| - if postprocess_class:
|
| - root = postprocess_class.Process(rctext, rc_path, root)
|
| - else:
|
| - self.Out(
|
| - 'PostProcessing class could not be found. Skipping postprocessing.\n')
|
| -
|
| - return root
|
| -
|
| -
|
| - def IsHtml(self, res_type, fname):
|
| - '''Check whether both the type and file extension indicate HTML'''
|
| - fext = fname.split('.')[-1].lower()
|
| - return res_type == 'HTML' and fext in ('htm', 'html')
|
| -
|
| -
|
| - def AddIncludes(self, rctext, node):
|
| - '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
|
| - adds each included resource as an <include> child node of 'node'.'''
|
| - for m in _FILE_REF.finditer(rctext):
|
| - id = m.group('id')
|
| - res_type = m.group('type').upper()
|
| - fname = rc.Section.UnEscape(m.group('file'))
|
| - assert fname.find('\n') == -1
|
| - if not self.IsHtml(res_type, fname):
|
| - self.VerboseOut('Processing %s with ID %s (filename: %s)\n' %
|
| - (res_type, id, fname))
|
| - node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname))
|
| -
|
| -
|
| - def AddStructures(self, rctext, node, rc_filename):
|
| - '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
|
| - information resources and HTML templates) and adds each as a <structure>
|
| - child of 'node'.'''
|
| - # First add HTML includes
|
| - for m in _FILE_REF.finditer(rctext):
|
| - id = m.group('id')
|
| - res_type = m.group('type').upper()
|
| - fname = rc.Section.UnEscape(m.group('file'))
|
| - if self.IsHtml(type, fname):
|
| - node.AddChild(structure.StructureNode.Construct(
|
| - node, id, self.html_type, fname, self.html_encoding))
|
| -
|
| - # Then add all RC includes
|
| - def AddStructure(res_type, id):
|
| - self.VerboseOut('Processing %s with ID %s\n' % (res_type, id))
|
| - node.AddChild(structure.StructureNode.Construct(node, id, res_type,
|
| - rc_filename,
|
| - encoding=self.input_encoding))
|
| - for m in _MENU.finditer(rctext):
|
| - AddStructure('menu', m.group('id'))
|
| - for m in _DIALOG.finditer(rctext):
|
| - AddStructure('dialog', m.group('id'))
|
| - for m in _VERSIONINFO.finditer(rctext):
|
| - AddStructure('version', m.group('id'))
|
| -
|
| -
|
| - def AddMessages(self, rctext, node):
|
| - '''Scans 'rctext' for all messages in string tables, preprocesses them as
|
| - much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
|
| - type format specifiers get those specifiers replaced with placeholders, and
|
| - HTML-formatted messages get run through the HTML-placeholderizer). Adds
|
| - each message as a <message> node child of 'node'.'''
|
| - for tm in _STRING_TABLE.finditer(rctext):
|
| - table = tm.group('body')
|
| - for mm in _MESSAGE.finditer(table):
|
| - comment_block = mm.group('comment')
|
| - comment_text = []
|
| - for cm in _COMMENT_TEXT.finditer(comment_block):
|
| - comment_text.append(cm.group('text'))
|
| - comment_text = ' '.join(comment_text)
|
| -
|
| - id = mm.group('id')
|
| - text = rc.Section.UnEscape(mm.group('text'))
|
| -
|
| - self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))
|
| -
|
| - msg_obj = self.Placeholderize(text)
|
| -
|
| - # Messages that contain only placeholders do not need translation.
|
| - is_translateable = False
|
| - for item in msg_obj.GetContent():
|
| - if isinstance(item, types.StringTypes):
|
| - if not _WHITESPACE_ONLY.match(item):
|
| - is_translateable = True
|
| -
|
| - if self.not_localizable_re.search(comment_text):
|
| - is_translateable = False
|
| -
|
| - message_meaning = ''
|
| - internal_comment = ''
|
| -
|
| - # If we have a "role model" (existing GRD file) and this node exists
|
| - # in the role model, use the description, meaning and translateable
|
| - # attributes from the role model.
|
| - if self.role_model:
|
| - role_node = self.role_model.GetNodeById(id)
|
| - if role_node:
|
| - is_translateable = role_node.IsTranslateable()
|
| - message_meaning = role_node.attrs['meaning']
|
| - comment_text = role_node.attrs['desc']
|
| - internal_comment = role_node.attrs['internal_comment']
|
| -
|
| - # For nontranslateable messages, we don't want the complexity of
|
| - # placeholderizing everything.
|
| - if not is_translateable:
|
| - msg_obj = tclib.Message(text=text)
|
| -
|
| - msg_node = message.MessageNode.Construct(node, msg_obj, id,
|
| - desc=comment_text,
|
| - translateable=is_translateable,
|
| - meaning=message_meaning)
|
| - msg_node.attrs['internal_comment'] = internal_comment
|
| -
|
| - node.AddChild(msg_node)
|
| - self.ExtraVerboseOut('Done processing message %s\n' % id)
|
| -
|
| -
|
| - def Placeholderize(self, text):
|
| - '''Creates a tclib.Message object from 'text', attempting to recognize
|
| - a few different formats of text that can be automatically placeholderized
|
| - (HTML code, printf-style format strings, and FormatMessage-style format
|
| - strings).
|
| - '''
|
| -
|
| - try:
|
| - # First try HTML placeholderizing.
|
| - # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
|
| - msg = tr_html.HtmlToMessage(text, True)
|
| - for item in msg.GetContent():
|
| - if not isinstance(item, types.StringTypes):
|
| - return msg # Contained at least one placeholder, so we're done
|
| -
|
| - # HTML placeholderization didn't do anything, so try to find printf or
|
| - # FormatMessage format specifiers and change them into placeholders.
|
| - msg = tclib.Message()
|
| - parts = _FORMAT_SPECIFIER.split(text)
|
| - todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc.
|
| - for part in parts:
|
| - if _FORMAT_SPECIFIER.match(part):
|
| - msg.AppendPlaceholder(tclib.Placeholder(
|
| - 'TODO_%04d' % todo_counter, part, 'TODO'))
|
| - todo_counter += 1
|
| - elif part != '':
|
| - msg.AppendText(part)
|
| -
|
| - if self.role_model and len(parts) > 1: # there are TODO placeholders
|
| - role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
|
| - msg.GetRealContent(), '')
|
| - if role_model_msg:
|
| - # replace wholesale to get placeholder names and examples
|
| - msg = role_model_msg
|
| -
|
| - return msg
|
| - except:
|
| - print 'Exception processing message with text "%s"' % text
|
| - raise
|
| -
|
|
|