grit/tool/rc2grd.py - Issue 7994004: Initial source commit to grit-i18n project.

Side by Side Diff: grit/tool/rc2grd.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 #!/usr/bin/python2.4

	2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 '''The 'grit rc2grd' tool.'''

	7

	8

	9 import os.path

	10 import getopt

	11 import re

	12 import StringIO

	13 import types

	14

	15 import grit.node.empty

	16 from grit.node import include

	17 from grit.node import structure

	18 from grit.node import message

	19

	20 from grit.gather import rc

	21 from grit.gather import tr_html

	22

	23 from grit.tool import interface

	24 from grit.tool import postprocess_interface

	25 from grit.tool import preprocess_interface

	26

	27 from grit import grd_reader

	28 from grit import tclib

	29 from grit import util

	30

	31

	32 # Matches files referenced from an .rc file

	33 _FILE_REF = re.compile('''

	34 ^(?P<id>[A-Z_0-9.]+)[ \t]+

	35 (?P<type>[A-Z_0-9]+)[ \t]+

	36 "(?P<file>.?([^"]\|""))"[ \t]$''', re.VERBOSE \| re.MULTILINE)

	37

	38

	39 # Matches a dialog section

	40 _DIALOG = re.compile('^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s$.+?^END\s $',

	41 re.MULTILINE \| re.DOTALL)

	42

	43

	44 # Matches a menu section

	45 _MENU = re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s$.+?^END\s$',

	46 re.MULTILINE \| re.DOTALL)

	47

	48

	49 # Matches a versioninfo section

	50 _VERSIONINFO = re.compile('^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s$.+?^E ND\s$',

	51 re.MULTILINE \| re.DOTALL)

	52

	53

	54 # Matches a stringtable

	55 _STRING_TABLE = re.compile('^STRINGTABLE(\s+(PRELOAD\|DISCARDABLE\|CHARACTERISTICS .+\|LANGUAGE.+\|VERSION.+))\s\nBEGIN\s$(?P<body>.+?)^END\s$',

	56 re.MULTILINE \| re.DOTALL)

	57

	58

	59 # Matches each message inside a stringtable, breaking it up into comments,

	60 # the ID of the message, and the (RC-escaped) message text.

	61 _MESSAGE = re.compile('''

	62 (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message

	63 ^\s*

	64 (?P<id>[A-Za-z0-9_]+) # id

	65 \s+

	66 "(?P<text>.*?([^"]\|""))"([^"]\|$) # The message itself

	67 ''', re.MULTILINE \| re.DOTALL \| re.VERBOSE)

	68

	69

	70 # Matches each line of comment text in a multi-line comment.

	71 _COMMENT_TEXT = re.compile('^\s//\s(?P<text>.+?)$', re.MULTILINE)

	72

	73

	74 # Matches a string that is empty or all whitespace

	75 _WHITESPACE_ONLY = re.compile('\A\s*\Z', re.MULTILINE)

	76

	77

	78 # Finds printf and FormatMessage style format specifiers

	79 # Uses non-capturing groups except for the outermost group, so the output of

	80 # re.split() should include both the normal text and what we intend to

	81 # replace with placeholders.

	82 # TODO(joi) Check documentation for printf (and Windows variants) and FormatMess age

	83 _FORMAT_SPECIFIER = re.compile(

	84 '(%[-# +]?(?:[0-9]\|\)(?:\.(?:[0-9]+\|\*))?(?:h\|l\|L)?' # printf up to last cha r

	85 '(?:d\|i\|o\|u\|x\|X\|e\|E\|f\|F\|g\|G\|c\|r\|s\|ls\|ws)' # printf last char

	86 '\|\$[1-9][0-9]*)') # FormatMessage

	87

	88

	89 class Rc2Grd(interface.Tool):

	90 '''A tool for converting .rc files to .grd files. This tool is only for

	91 converting the source (nontranslated) .rc file to a .grd file. For importing

	92 existing translations, use the rc2xtb tool.

	93

	94 Usage: grit [global options] rc2grd [OPTIONS] RCFILE

	95

	96 The tool takes a single argument, which is the path to the .rc file to convert.

	97 It outputs a .grd file with the same name in the same directory as the .rc file.

	98 The .grd file may have one or more TODO comments for things that have to be

	99 cleaned up manually.

	100

	101 OPTIONS may be any of the following:

	102

	103 -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'.

	104

	105 -h TYPE Specify the TYPE attribute for HTML structures.

	106 Default is 'tr_html'.

	107

	108 -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'.

	109

	110 -n MATCH Specify the regular expression to match in comments that will

	111 indicate that the resource the comment belongs to is not

	112 translateable. Default is 'Not locali(s\|z)able'.

	113

	114 -r GRDFILE Specify that GRDFILE should be used as a "role model" for

	115 any placeholders that otherwise would have had TODO names.

	116 This attempts to find an identical message in the GRDFILE

	117 and uses that instead of the automatically placeholderized

	118 message.

	119

	120 --pre CLASS Specify an optional, fully qualified classname, which

	121 has to be a subclass of grit.tool.PreProcessor, to

	122 run on the text of the RC file before conversion occurs.

	123 This can be used to support constructs in the RC files

	124 that GRIT cannot handle on its own.

	125

	126 --post CLASS Specify an optional, fully qualified classname, which

	127 has to be a subclass of grit.tool.PostProcessor, to

	128 run on the text of the converted RC file.

	129 This can be used to alter the content of the RC file

	130 based on the conversion that occured.

	131

	132 For menus, dialogs and version info, the .grd file will refer to the original

	133 .rc file. Once conversion is complete, you can strip the original .rc file

	134 of its string table and all comments as these will be available in the .grd

	135 file.

	136

	137 Note that this tool WILL NOT obey C preprocessor rules, so even if something

	138 is #if 0-ed out it will still be included in the output of this tool

	139 Therefore, if your .rc file contains sections like this, you should run the

	140 C preprocessor on the .rc file or manually edit it before using this tool.

	141 '''

	142

	143 def ShortDescription(self):

	144 return 'A tool for converting .rc source files to .grd files.'

	145

	146 def __init__(self):

	147 self.input_encoding = 'cp1252'

	148 self.html_type = 'tr_html'

	149 self.html_encoding = 'utf-8'

	150 self.not_localizable_re = re.compile('Not locali(s\|z)able')

	151 self.role_model = None

	152 self.pre_process = None

	153 self.post_process = None

	154

	155 def ParseOptions(self, args):

	156 '''Given a list of arguments, set this object's options and return

	157 all non-option arguments.

	158 '''

	159 (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])

	160 for (key, val) in own_opts:

	161 if key == '-e':

	162 self.input_encoding = val

	163 elif key == '-h':

	164 self.html_type = val

	165 elif key == '-u':

	166 self.html_encoding = val

	167 elif key == '-n':

	168 self.not_localizable_re = re.compile(val)

	169 elif key == '-r':

	170 self.role_model = grd_reader.Parse(val)

	171 elif key == '--pre':

	172 self.pre_process = val

	173 elif key == '--post':

	174 self.post_process = val

	175 return args

	176

	177 def Run(self, opts, args):

	178 args = self.ParseOptions(args)

	179 if len(args) != 1:

	180 print ('This tool takes a single tool-specific argument, the path to the\n '

	181 '.rc file to process.')

	182 return 2

	183 self.SetOptions(opts)

	184

	185 path = args[0]

	186 out_path = os.path.join(util.dirname(path),

	187 os.path.splitext(os.path.basename(path))[0] + '.grd')

	188

	189 rcfile = util.WrapInputStream(file(path, 'r'), self.input_encoding)

	190 rctext = rcfile.read()

	191

	192 grd_text = unicode(self.Process(rctext, path))

	193

	194 rcfile.close()

	195

	196 outfile = util.WrapOutputStream(file(out_path, 'w'), 'utf-8')

	197 outfile.write(grd_text)

	198 outfile.close()

	199

	200 print 'Wrote output file %s.\nPlease check for TODO items in the file.' % ou t_path

	201

	202

	203 def Process(self, rctext, rc_path):

	204 '''Processes 'rctext' and returns a resource tree corresponding to it.

	205

	206 Args:

	207 rctext: complete text of the rc file

	208 rc_path: 'resource\resource.rc'

	209

	210 Return:

	211 grit.node.base.Node subclass

	212 '''

	213

	214 if self.pre_process:

	215 preprocess_class = util.NewClassInstance(self.pre_process,

	216 preprocess_interface.PreProcessor )

	217 if preprocess_class:

	218 rctext = preprocess_class.Process(rctext, rc_path)

	219 else:

	220 self.Out(

	221 'PreProcessing class could not be found. Skipping preprocessing.\n')

	222

	223 # Start with a basic skeleton for the .grd file

	224 root = grd_reader.Parse(StringIO.StringIO(

	225 '''<?xml version="1.0" encoding="UTF-8"?>

	226 <grit base_dir="." latest_public_release="0"

	227 current_release="1" source_lang_id="en">

	228 <outputs />

	229 <translations />

	230 <release seq="1">

	231 <includes />

	232 <structures />

	233 <messages />

	234 </release>

	235 </grit>'''), util.dirname(rc_path))

	236 includes = root.children[2].children[0]

	237 structures = root.children[2].children[1]

	238 messages = root.children[2].children[2]

	239 assert (isinstance(includes, grit.node.empty.IncludesNode) and

	240 isinstance(structures, grit.node.empty.StructuresNode) and

	241 isinstance(messages, grit.node.empty.MessagesNode))

	242

	243 self.AddIncludes(rctext, includes)

	244 self.AddStructures(rctext, structures, os.path.basename(rc_path))

	245 self.AddMessages(rctext, messages)

	246

	247 self.VerboseOut('Validating that all IDs are unique...\n')

	248 root.ValidateUniqueIds()

	249 self.ExtraVerboseOut('Done validating that all IDs are unique.\n')

	250

	251 if self.post_process:

	252 postprocess_class = util.NewClassInstance(self.post_process,

	253 postprocess_interface.PostProces sor)

	254 if postprocess_class:

	255 root = postprocess_class.Process(rctext, rc_path, root)

	256 else:

	257 self.Out(

	258 'PostProcessing class could not be found. Skipping postprocessing.\n')

	259

	260 return root

	261

	262

	263 def AddIncludes(self, rctext, node):

	264 '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and

	265 adds each included resource as an <include> child node of 'node'.'''

	266 for m in _FILE_REF.finditer(rctext):

	267 id = m.group('id')

	268 type = m.group('type').upper()

	269 fname = rc.Section.UnEscape(m.group('file'))

	270 assert fname.find('\n') == -1

	271 if type != 'HTML':

	272 self.VerboseOut('Processing %s with ID %s (filename: %s)\n' % (type, id, fname))

	273 node.AddChild(include.IncludeNode.Construct(node, id, type, fname))

	274

	275

	276 def AddStructures(self, rctext, node, rc_filename):

	277 '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version

	278 information resources and HTML templates) and adds each as a <structure>

	279 child of 'node'.'''

	280 # First add HTML includes

	281 for m in _FILE_REF.finditer(rctext):

	282 id = m.group('id')

	283 type = m.group('type').upper()

	284 fname = rc.Section.UnEscape(m.group('file'))

	285 if type == 'HTML':

	286 node.AddChild(structure.StructureNode.Construct(

	287 node, id, self.html_type, fname, self.html_encoding))

	288

	289 # Then add all RC includes

	290 def AddStructure(type, id):

	291 self.VerboseOut('Processing %s with ID %s\n' % (type, id))

	292 node.AddChild(structure.StructureNode.Construct(node, id, type,

	293 rc_filename,

	294 encoding=self.input_encodi ng))

	295 for m in _MENU.finditer(rctext):

	296 AddStructure('menu', m.group('id'))

	297 for m in _DIALOG.finditer(rctext):

	298 AddStructure('dialog', m.group('id'))

	299 for m in _VERSIONINFO.finditer(rctext):

	300 AddStructure('version', m.group('id'))

	301

	302

	303 def AddMessages(self, rctext, node):

	304 '''Scans 'rctext' for all messages in string tables, preprocesses them as

	305 much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d

	306 type format specifiers get those specifiers replaced with placeholders, and

	307 HTML-formatted messages get run through the HTML-placeholderizer). Adds

	308 each message as a <message> node child of 'node'.'''

	309 for tm in _STRING_TABLE.finditer(rctext):

	310 table = tm.group('body')

	311 for mm in _MESSAGE.finditer(table):

	312 comment_block = mm.group('comment')

	313 comment_text = []

	314 for cm in _COMMENT_TEXT.finditer(comment_block):

	315 comment_text.append(cm.group('text'))

	316 comment_text = ' '.join(comment_text)

	317

	318 id = mm.group('id')

	319 text = rc.Section.UnEscape(mm.group('text'))

	320

	321 self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))

	322

	323 msg_obj = self.Placeholderize(text)

	324

	325 # Messages that contain only placeholders do not need translation.

	326 is_translateable = False

	327 for item in msg_obj.GetContent():

	328 if isinstance(item, types.StringTypes):

	329 if not _WHITESPACE_ONLY.match(item):

	330 is_translateable = True

	331

	332 if self.not_localizable_re.search(comment_text):

	333 is_translateable = False

	334

	335 message_meaning = ''

	336 internal_comment = ''

	337

	338 # If we have a "role model" (existing GRD file) and this node exists

	339 # in the role model, use the description, meaning and translateable

	340 # attributes from the role model.

	341 if self.role_model:

	342 role_node = self.role_model.GetNodeById(id)

	343 if role_node:

	344 is_translateable = role_node.IsTranslateable()

	345 message_meaning = role_node.attrs['meaning']

	346 comment_text = role_node.attrs['desc']

	347 internal_comment = role_node.attrs['internal_comment']

	348

	349 # For nontranslateable messages, we don't want the complexity of

	350 # placeholderizing everything.

	351 if not is_translateable:

	352 msg_obj = tclib.Message(text=text)

	353

	354 msg_node = message.MessageNode.Construct(node, msg_obj, id,

	355 desc=comment_text,

	356 translateable=is_translateable,

	357 meaning=message_meaning)

	358 msg_node.attrs['internal_comment'] = internal_comment

	359

	360 node.AddChild(msg_node)

	361 self.ExtraVerboseOut('Done processing message %s\n' % id)

	362

	363

	364 def Placeholderize(self, text):

	365 '''Creates a tclib.Message object from 'text', attempting to recognize

	366 a few different formats of text that can be automatically placeholderized

	367 (HTML code, printf-style format strings, and FormatMessage-style format

	368 strings).

	369 '''

	370

	371 try:

	372 # First try HTML placeholderizing.

	373 # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing

	374 msg = tr_html.HtmlToMessage(text, True)

	375 for item in msg.GetContent():

	376 if not isinstance(item, types.StringTypes):

	377 return msg # Contained at least one placeholder, so we're done

	378

	379 # HTML placeholderization didn't do anything, so try to find printf or

	380 # FormatMessage format specifiers and change them into placeholders.

	381 msg = tclib.Message()

	382 parts = _FORMAT_SPECIFIER.split(text)

	383 todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc.

	384 for part in parts:

	385 if _FORMAT_SPECIFIER.match(part):

	386 msg.AppendPlaceholder(tclib.Placeholder(

	387 'TODO_%04d' % todo_counter, part, 'TODO'))

	388 todo_counter += 1

	389 elif part != '':

	390 msg.AppendText(part)

	391

	392 if self.role_model and len(parts) > 1: # there are TODO placeholders

	393 role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(

	394 msg.GetRealContent(), '')

	395 if role_model_msg:

	396 # replace wholesale to get placeholder names and examples

	397 msg = role_model_msg

	398

	399 return msg

	400 except:

	401 print 'Exception processing message with text "%s"' % text

	402 raise

	403

OLD	NEW

« no previous file with comments | « grit/tool/preprocess_unittest.py ('k') | grit/tool/rc2grd_unittest.py » ('j') | no next file with comments »