Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(286)

Unified Diff: grit/tool/rc2grd.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « grit/tool/preprocess_unittest.py ('k') | grit/tool/rc2grd_unittest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: grit/tool/rc2grd.py
===================================================================
--- grit/tool/rc2grd.py (revision 0)
+++ grit/tool/rc2grd.py (revision 0)
@@ -0,0 +1,403 @@
+#!/usr/bin/python2.4
+# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+'''The 'grit rc2grd' tool.'''
+
+
+import os.path
+import getopt
+import re
+import StringIO
+import types
+
+import grit.node.empty
+from grit.node import include
+from grit.node import structure
+from grit.node import message
+
+from grit.gather import rc
+from grit.gather import tr_html
+
+from grit.tool import interface
+from grit.tool import postprocess_interface
+from grit.tool import preprocess_interface
+
+from grit import grd_reader
+from grit import tclib
+from grit import util
+
+
+# Matches files referenced from an .rc file
+_FILE_REF = re.compile('''
+ ^(?P<id>[A-Z_0-9.]+)[ \t]+
+ (?P<type>[A-Z_0-9]+)[ \t]+
+ "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE)
+
+
+# Matches a dialog section
+_DIALOG = re.compile('^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$',
+ re.MULTILINE | re.DOTALL)
+
+
+# Matches a menu section
+_MENU = re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$',
+ re.MULTILINE | re.DOTALL)
+
+
+# Matches a versioninfo section
+_VERSIONINFO = re.compile('^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$',
+ re.MULTILINE | re.DOTALL)
+
+
+# Matches a stringtable
+_STRING_TABLE = re.compile('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$',
+ re.MULTILINE | re.DOTALL)
+
+
+# Matches each message inside a stringtable, breaking it up into comments,
+# the ID of the message, and the (RC-escaped) message text.
+_MESSAGE = re.compile('''
+ (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message
+ ^\s*
+ (?P<id>[A-Za-z0-9_]+) # id
+ \s+
+ "(?P<text>.*?([^"]|""))"([^"]|$) # The message itself
+ ''', re.MULTILINE | re.DOTALL | re.VERBOSE)
+
+
+# Matches each line of comment text in a multi-line comment.
+_COMMENT_TEXT = re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE)
+
+
+# Matches a string that is empty or all whitespace
+_WHITESPACE_ONLY = re.compile('\A\s*\Z', re.MULTILINE)
+
+
+# Finds printf and FormatMessage style format specifiers
+# Uses non-capturing groups except for the outermost group, so the output of
+# re.split() should include both the normal text and what we intend to
+# replace with placeholders.
+# TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage
+_FORMAT_SPECIFIER = re.compile(
+ '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last char
+ '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)' # printf last char
+ '|\$[1-9][0-9]*)') # FormatMessage
+
+
+class Rc2Grd(interface.Tool):
+ '''A tool for converting .rc files to .grd files. This tool is only for
+converting the source (nontranslated) .rc file to a .grd file. For importing
+existing translations, use the rc2xtb tool.
+
+Usage: grit [global options] rc2grd [OPTIONS] RCFILE
+
+The tool takes a single argument, which is the path to the .rc file to convert.
+It outputs a .grd file with the same name in the same directory as the .rc file.
+The .grd file may have one or more TODO comments for things that have to be
+cleaned up manually.
+
+OPTIONS may be any of the following:
+
+ -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'.
+
+ -h TYPE Specify the TYPE attribute for HTML structures.
+ Default is 'tr_html'.
+
+ -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'.
+
+ -n MATCH Specify the regular expression to match in comments that will
+ indicate that the resource the comment belongs to is not
+ translateable. Default is 'Not locali(s|z)able'.
+
+ -r GRDFILE Specify that GRDFILE should be used as a "role model" for
+ any placeholders that otherwise would have had TODO names.
+ This attempts to find an identical message in the GRDFILE
+ and uses that instead of the automatically placeholderized
+ message.
+
+ --pre CLASS Specify an optional, fully qualified classname, which
+ has to be a subclass of grit.tool.PreProcessor, to
+ run on the text of the RC file before conversion occurs.
+ This can be used to support constructs in the RC files
+ that GRIT cannot handle on its own.
+
+ --post CLASS Specify an optional, fully qualified classname, which
+ has to be a subclass of grit.tool.PostProcessor, to
+ run on the text of the converted RC file.
+ This can be used to alter the content of the RC file
+ based on the conversion that occured.
+
+For menus, dialogs and version info, the .grd file will refer to the original
+.rc file. Once conversion is complete, you can strip the original .rc file
+of its string table and all comments as these will be available in the .grd
+file.
+
+Note that this tool WILL NOT obey C preprocessor rules, so even if something
+is #if 0-ed out it will still be included in the output of this tool
+Therefore, if your .rc file contains sections like this, you should run the
+C preprocessor on the .rc file or manually edit it before using this tool.
+'''
+
+ def ShortDescription(self):
+ return 'A tool for converting .rc source files to .grd files.'
+
+ def __init__(self):
+ self.input_encoding = 'cp1252'
+ self.html_type = 'tr_html'
+ self.html_encoding = 'utf-8'
+ self.not_localizable_re = re.compile('Not locali(s|z)able')
+ self.role_model = None
+ self.pre_process = None
+ self.post_process = None
+
+ def ParseOptions(self, args):
+ '''Given a list of arguments, set this object's options and return
+ all non-option arguments.
+ '''
+ (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
+ for (key, val) in own_opts:
+ if key == '-e':
+ self.input_encoding = val
+ elif key == '-h':
+ self.html_type = val
+ elif key == '-u':
+ self.html_encoding = val
+ elif key == '-n':
+ self.not_localizable_re = re.compile(val)
+ elif key == '-r':
+ self.role_model = grd_reader.Parse(val)
+ elif key == '--pre':
+ self.pre_process = val
+ elif key == '--post':
+ self.post_process = val
+ return args
+
+ def Run(self, opts, args):
+ args = self.ParseOptions(args)
+ if len(args) != 1:
+ print ('This tool takes a single tool-specific argument, the path to the\n'
+ '.rc file to process.')
+ return 2
+ self.SetOptions(opts)
+
+ path = args[0]
+ out_path = os.path.join(util.dirname(path),
+ os.path.splitext(os.path.basename(path))[0] + '.grd')
+
+ rcfile = util.WrapInputStream(file(path, 'r'), self.input_encoding)
+ rctext = rcfile.read()
+
+ grd_text = unicode(self.Process(rctext, path))
+
+ rcfile.close()
+
+ outfile = util.WrapOutputStream(file(out_path, 'w'), 'utf-8')
+ outfile.write(grd_text)
+ outfile.close()
+
+ print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path
+
+
+ def Process(self, rctext, rc_path):
+ '''Processes 'rctext' and returns a resource tree corresponding to it.
+
+ Args:
+ rctext: complete text of the rc file
+ rc_path: 'resource\resource.rc'
+
+ Return:
+ grit.node.base.Node subclass
+ '''
+
+ if self.pre_process:
+ preprocess_class = util.NewClassInstance(self.pre_process,
+ preprocess_interface.PreProcessor)
+ if preprocess_class:
+ rctext = preprocess_class.Process(rctext, rc_path)
+ else:
+ self.Out(
+ 'PreProcessing class could not be found. Skipping preprocessing.\n')
+
+ # Start with a basic skeleton for the .grd file
+ root = grd_reader.Parse(StringIO.StringIO(
+ '''<?xml version="1.0" encoding="UTF-8"?>
+ <grit base_dir="." latest_public_release="0"
+ current_release="1" source_lang_id="en">
+ <outputs />
+ <translations />
+ <release seq="1">
+ <includes />
+ <structures />
+ <messages />
+ </release>
+ </grit>'''), util.dirname(rc_path))
+ includes = root.children[2].children[0]
+ structures = root.children[2].children[1]
+ messages = root.children[2].children[2]
+ assert (isinstance(includes, grit.node.empty.IncludesNode) and
+ isinstance(structures, grit.node.empty.StructuresNode) and
+ isinstance(messages, grit.node.empty.MessagesNode))
+
+ self.AddIncludes(rctext, includes)
+ self.AddStructures(rctext, structures, os.path.basename(rc_path))
+ self.AddMessages(rctext, messages)
+
+ self.VerboseOut('Validating that all IDs are unique...\n')
+ root.ValidateUniqueIds()
+ self.ExtraVerboseOut('Done validating that all IDs are unique.\n')
+
+ if self.post_process:
+ postprocess_class = util.NewClassInstance(self.post_process,
+ postprocess_interface.PostProcessor)
+ if postprocess_class:
+ root = postprocess_class.Process(rctext, rc_path, root)
+ else:
+ self.Out(
+ 'PostProcessing class could not be found. Skipping postprocessing.\n')
+
+ return root
+
+
+ def AddIncludes(self, rctext, node):
+ '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
+ adds each included resource as an <include> child node of 'node'.'''
+ for m in _FILE_REF.finditer(rctext):
+ id = m.group('id')
+ type = m.group('type').upper()
+ fname = rc.Section.UnEscape(m.group('file'))
+ assert fname.find('\n') == -1
+ if type != 'HTML':
+ self.VerboseOut('Processing %s with ID %s (filename: %s)\n' % (type, id, fname))
+ node.AddChild(include.IncludeNode.Construct(node, id, type, fname))
+
+
+ def AddStructures(self, rctext, node, rc_filename):
+ '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
+ information resources and HTML templates) and adds each as a <structure>
+ child of 'node'.'''
+ # First add HTML includes
+ for m in _FILE_REF.finditer(rctext):
+ id = m.group('id')
+ type = m.group('type').upper()
+ fname = rc.Section.UnEscape(m.group('file'))
+ if type == 'HTML':
+ node.AddChild(structure.StructureNode.Construct(
+ node, id, self.html_type, fname, self.html_encoding))
+
+ # Then add all RC includes
+ def AddStructure(type, id):
+ self.VerboseOut('Processing %s with ID %s\n' % (type, id))
+ node.AddChild(structure.StructureNode.Construct(node, id, type,
+ rc_filename,
+ encoding=self.input_encoding))
+ for m in _MENU.finditer(rctext):
+ AddStructure('menu', m.group('id'))
+ for m in _DIALOG.finditer(rctext):
+ AddStructure('dialog', m.group('id'))
+ for m in _VERSIONINFO.finditer(rctext):
+ AddStructure('version', m.group('id'))
+
+
+ def AddMessages(self, rctext, node):
+ '''Scans 'rctext' for all messages in string tables, preprocesses them as
+ much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
+ type format specifiers get those specifiers replaced with placeholders, and
+ HTML-formatted messages get run through the HTML-placeholderizer). Adds
+ each message as a <message> node child of 'node'.'''
+ for tm in _STRING_TABLE.finditer(rctext):
+ table = tm.group('body')
+ for mm in _MESSAGE.finditer(table):
+ comment_block = mm.group('comment')
+ comment_text = []
+ for cm in _COMMENT_TEXT.finditer(comment_block):
+ comment_text.append(cm.group('text'))
+ comment_text = ' '.join(comment_text)
+
+ id = mm.group('id')
+ text = rc.Section.UnEscape(mm.group('text'))
+
+ self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))
+
+ msg_obj = self.Placeholderize(text)
+
+ # Messages that contain only placeholders do not need translation.
+ is_translateable = False
+ for item in msg_obj.GetContent():
+ if isinstance(item, types.StringTypes):
+ if not _WHITESPACE_ONLY.match(item):
+ is_translateable = True
+
+ if self.not_localizable_re.search(comment_text):
+ is_translateable = False
+
+ message_meaning = ''
+ internal_comment = ''
+
+ # If we have a "role model" (existing GRD file) and this node exists
+ # in the role model, use the description, meaning and translateable
+ # attributes from the role model.
+ if self.role_model:
+ role_node = self.role_model.GetNodeById(id)
+ if role_node:
+ is_translateable = role_node.IsTranslateable()
+ message_meaning = role_node.attrs['meaning']
+ comment_text = role_node.attrs['desc']
+ internal_comment = role_node.attrs['internal_comment']
+
+ # For nontranslateable messages, we don't want the complexity of
+ # placeholderizing everything.
+ if not is_translateable:
+ msg_obj = tclib.Message(text=text)
+
+ msg_node = message.MessageNode.Construct(node, msg_obj, id,
+ desc=comment_text,
+ translateable=is_translateable,
+ meaning=message_meaning)
+ msg_node.attrs['internal_comment'] = internal_comment
+
+ node.AddChild(msg_node)
+ self.ExtraVerboseOut('Done processing message %s\n' % id)
+
+
+ def Placeholderize(self, text):
+ '''Creates a tclib.Message object from 'text', attempting to recognize
+ a few different formats of text that can be automatically placeholderized
+ (HTML code, printf-style format strings, and FormatMessage-style format
+ strings).
+ '''
+
+ try:
+ # First try HTML placeholderizing.
+ # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
+ msg = tr_html.HtmlToMessage(text, True)
+ for item in msg.GetContent():
+ if not isinstance(item, types.StringTypes):
+ return msg # Contained at least one placeholder, so we're done
+
+ # HTML placeholderization didn't do anything, so try to find printf or
+ # FormatMessage format specifiers and change them into placeholders.
+ msg = tclib.Message()
+ parts = _FORMAT_SPECIFIER.split(text)
+ todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc.
+ for part in parts:
+ if _FORMAT_SPECIFIER.match(part):
+ msg.AppendPlaceholder(tclib.Placeholder(
+ 'TODO_%04d' % todo_counter, part, 'TODO'))
+ todo_counter += 1
+ elif part != '':
+ msg.AppendText(part)
+
+ if self.role_model and len(parts) > 1: # there are TODO placeholders
+ role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
+ msg.GetRealContent(), '')
+ if role_model_msg:
+ # replace wholesale to get placeholder names and examples
+ msg = role_model_msg
+
+ return msg
+ except:
+ print 'Exception processing message with text "%s"' % text
+ raise
+
Property changes on: grit/tool/rc2grd.py
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « grit/tool/preprocess_unittest.py ('k') | grit/tool/rc2grd_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698