grit/tool/rc2grd.py - Issue 1442863002: Remove contents of grit's SVN repository.

Side by Side Diff: grit/tool/rc2grd.py

Issue 1442863002: Remove contents of grit's SVN repository. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 '''The 'grit rc2grd' tool.'''

7

8

9 import os.path

10 import getopt

11 import re

12 import StringIO

13 import types

14

15 import grit.node.empty

16 from grit.node import include

17 from grit.node import structure

18 from grit.node import message

19

20 from grit.gather import rc

21 from grit.gather import tr_html

22

23 from grit.tool import interface

24 from grit.tool import postprocess_interface

25 from grit.tool import preprocess_interface

26

27 from grit import grd_reader

28 from grit import lazy_re

29 from grit import tclib

30 from grit import util

31

32

33 # Matches files referenced from an .rc file

34 _FILE_REF = lazy_re.compile('''

35 ^(?P<id>[A-Z_0-9.]+)[ \t]+

36 (?P<type>[A-Z_0-9]+)[ \t]+

37 "(?P<file>.?([^"]\|""))"[ \t]$''', re.VERBOSE \| re.MULTILINE)

38

39

40 # Matches a dialog section

41 _DIALOG = lazy_re.compile(

42 '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s$.+?^END\s$',

43 re.MULTILINE \| re.DOTALL)

44

45

46 # Matches a menu section

47 _MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s$.+?^END\s$',

48 re.MULTILINE \| re.DOTALL)

49

50

51 # Matches a versioninfo section

52 _VERSIONINFO = lazy_re.compile(

53 '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s$.+?^END\s$',

54 re.MULTILINE \| re.DOTALL)

55

56

57 # Matches a stringtable

58 _STRING_TABLE = lazy_re.compile(

59 ('^STRINGTABLE(\s+(PRELOAD\|DISCARDABLE\|CHARACTERISTICS.+\|LANGUAGE.+\|'

60 'VERSION.+))\s\nBEGIN\s$(?P<body>.+?)^END\s$'),

61 re.MULTILINE \| re.DOTALL)

62

63

64 # Matches each message inside a stringtable, breaking it up into comments,

65 # the ID of the message, and the (RC-escaped) message text.

66 _MESSAGE = lazy_re.compile('''

67 (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message

68 ^\s*

69 (?P<id>[A-Za-z0-9_]+) # id

70 \s+

71 "(?P<text>.*?([^"]\|""))"([^"]\|$) # The message itself

72 ''', re.MULTILINE \| re.DOTALL \| re.VERBOSE)

73

74

75 # Matches each line of comment text in a multi-line comment.

76 _COMMENT_TEXT = lazy_re.compile('^\s//\s(?P<text>.+?)$', re.MULTILINE)

77

78

79 # Matches a string that is empty or all whitespace

80 _WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE)

81

82

83 # Finds printf and FormatMessage style format specifiers

84 # Uses non-capturing groups except for the outermost group, so the output of

85 # re.split() should include both the normal text and what we intend to

86 # replace with placeholders.

87 # TODO(joi) Check documentation for printf (and Windows variants) and FormatMess age

88 _FORMAT_SPECIFIER = lazy_re.compile(

89 '(%[-# +]?(?:[0-9]\|\)(?:\.(?:[0-9]+\|\*))?(?:h\|l\|L)?' # printf up to last cha r

90 '(?:d\|i\|o\|u\|x\|X\|e\|E\|f\|F\|g\|G\|c\|r\|s\|ls\|ws)' # printf last char

91 '\|\$[1-9][0-9]*)') # FormatMessage

92

93

94 class Rc2Grd(interface.Tool):

95 '''A tool for converting .rc files to .grd files. This tool is only for

96 converting the source (nontranslated) .rc file to a .grd file. For importing

97 existing translations, use the rc2xtb tool.

98

99 Usage: grit [global options] rc2grd [OPTIONS] RCFILE

100

101 The tool takes a single argument, which is the path to the .rc file to convert.

102 It outputs a .grd file with the same name in the same directory as the .rc file.

103 The .grd file may have one or more TODO comments for things that have to be

104 cleaned up manually.

105

106 OPTIONS may be any of the following:

107

108 -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'.

109

110 -h TYPE Specify the TYPE attribute for HTML structures.

111 Default is 'tr_html'.

112

113 -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'.

114

115 -n MATCH Specify the regular expression to match in comments that will

116 indicate that the resource the comment belongs to is not

117 translateable. Default is 'Not locali(s\|z)able'.

118

119 -r GRDFILE Specify that GRDFILE should be used as a "role model" for

120 any placeholders that otherwise would have had TODO names.

121 This attempts to find an identical message in the GRDFILE

122 and uses that instead of the automatically placeholderized

123 message.

124

125 --pre CLASS Specify an optional, fully qualified classname, which

126 has to be a subclass of grit.tool.PreProcessor, to

127 run on the text of the RC file before conversion occurs.

128 This can be used to support constructs in the RC files

129 that GRIT cannot handle on its own.

130

131 --post CLASS Specify an optional, fully qualified classname, which

132 has to be a subclass of grit.tool.PostProcessor, to

133 run on the text of the converted RC file.

134 This can be used to alter the content of the RC file

135 based on the conversion that occured.

136

137 For menus, dialogs and version info, the .grd file will refer to the original

138 .rc file. Once conversion is complete, you can strip the original .rc file

139 of its string table and all comments as these will be available in the .grd

140 file.

141

142 Note that this tool WILL NOT obey C preprocessor rules, so even if something

143 is #if 0-ed out it will still be included in the output of this tool

144 Therefore, if your .rc file contains sections like this, you should run the

145 C preprocessor on the .rc file or manually edit it before using this tool.

146 '''

147

148 def ShortDescription(self):

149 return 'A tool for converting .rc source files to .grd files.'

150

151 def __init__(self):

152 self.input_encoding = 'cp1252'

153 self.html_type = 'tr_html'

154 self.html_encoding = 'utf-8'

155 self.not_localizable_re = re.compile('Not locali(s\|z)able')

156 self.role_model = None

157 self.pre_process = None

158 self.post_process = None

159

160 def ParseOptions(self, args):

161 '''Given a list of arguments, set this object's options and return

162 all non-option arguments.

163 '''

164 (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])

165 for (key, val) in own_opts:

166 if key == '-e':

167 self.input_encoding = val

168 elif key == '-h':

169 self.html_type = val

170 elif key == '-u':

171 self.html_encoding = val

172 elif key == '-n':

173 self.not_localizable_re = re.compile(val)

174 elif key == '-r':

175 self.role_model = grd_reader.Parse(val)

176 elif key == '--pre':

177 self.pre_process = val

178 elif key == '--post':

179 self.post_process = val

180 return args

181

182 def Run(self, opts, args):

183 args = self.ParseOptions(args)

184 if len(args) != 1:

185 print ('This tool takes a single tool-specific argument, the path to the\n '

186 '.rc file to process.')

187 return 2

188 self.SetOptions(opts)

189

190 path = args[0]

191 out_path = os.path.join(util.dirname(path),

192 os.path.splitext(os.path.basename(path))[0] + '.grd')

193

194 rctext = util.ReadFile(path, self.input_encoding)

195 grd_text = unicode(self.Process(rctext, path))

196 with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile:

197 outfile.write(grd_text)

198

199 print 'Wrote output file %s.\nPlease check for TODO items in the file.' % ou t_path

200

201

202 def Process(self, rctext, rc_path):

203 '''Processes 'rctext' and returns a resource tree corresponding to it.

204

205 Args:

206 rctext: complete text of the rc file

207 rc_path: 'resource\resource.rc'

208

209 Return:

210 grit.node.base.Node subclass

211 '''

212

213 if self.pre_process:

214 preprocess_class = util.NewClassInstance(self.pre_process,

215 preprocess_interface.PreProcessor )

216 if preprocess_class:

217 rctext = preprocess_class.Process(rctext, rc_path)

218 else:

219 self.Out(

220 'PreProcessing class could not be found. Skipping preprocessing.\n')

221

222 # Start with a basic skeleton for the .grd file

223 root = grd_reader.Parse(StringIO.StringIO(

224 '''<?xml version="1.0" encoding="UTF-8"?>

225 <grit base_dir="." latest_public_release="0"

226 current_release="1" source_lang_id="en">

227 <outputs />

228 <translations />

229 <release seq="1">

230 <includes />

231 <structures />

232 <messages />

233 </release>

234 </grit>'''), util.dirname(rc_path))

235 includes = root.children[2].children[0]

236 structures = root.children[2].children[1]

237 messages = root.children[2].children[2]

238 assert (isinstance(includes, grit.node.empty.IncludesNode) and

239 isinstance(structures, grit.node.empty.StructuresNode) and

240 isinstance(messages, grit.node.empty.MessagesNode))

241

242 self.AddIncludes(rctext, includes)

243 self.AddStructures(rctext, structures, os.path.basename(rc_path))

244 self.AddMessages(rctext, messages)

245

246 self.VerboseOut('Validating that all IDs are unique...\n')

247 root.ValidateUniqueIds()

248 self.ExtraVerboseOut('Done validating that all IDs are unique.\n')

249

250 if self.post_process:

251 postprocess_class = util.NewClassInstance(self.post_process,

252 postprocess_interface.PostProces sor)

253 if postprocess_class:

254 root = postprocess_class.Process(rctext, rc_path, root)

255 else:

256 self.Out(

257 'PostProcessing class could not be found. Skipping postprocessing.\n')

258

259 return root

260

261

262 def IsHtml(self, res_type, fname):

263 '''Check whether both the type and file extension indicate HTML'''

264 fext = fname.split('.')[-1].lower()

265 return res_type == 'HTML' and fext in ('htm', 'html')

266

267

268 def AddIncludes(self, rctext, node):

269 '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and

270 adds each included resource as an <include> child node of 'node'.'''

271 for m in _FILE_REF.finditer(rctext):

272 id = m.group('id')

273 res_type = m.group('type').upper()

274 fname = rc.Section.UnEscape(m.group('file'))

275 assert fname.find('\n') == -1

276 if not self.IsHtml(res_type, fname):

277 self.VerboseOut('Processing %s with ID %s (filename: %s)\n' %

278 (res_type, id, fname))

279 node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname))

280

281

282 def AddStructures(self, rctext, node, rc_filename):

283 '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version

284 information resources and HTML templates) and adds each as a <structure>

285 child of 'node'.'''

286 # First add HTML includes

287 for m in _FILE_REF.finditer(rctext):

288 id = m.group('id')

289 res_type = m.group('type').upper()

290 fname = rc.Section.UnEscape(m.group('file'))

291 if self.IsHtml(type, fname):

292 node.AddChild(structure.StructureNode.Construct(

293 node, id, self.html_type, fname, self.html_encoding))

294

295 # Then add all RC includes

296 def AddStructure(res_type, id):

297 self.VerboseOut('Processing %s with ID %s\n' % (res_type, id))

298 node.AddChild(structure.StructureNode.Construct(node, id, res_type,

299 rc_filename,

300 encoding=self.input_encodi ng))

301 for m in _MENU.finditer(rctext):

302 AddStructure('menu', m.group('id'))

303 for m in _DIALOG.finditer(rctext):

304 AddStructure('dialog', m.group('id'))

305 for m in _VERSIONINFO.finditer(rctext):

306 AddStructure('version', m.group('id'))

307

308

309 def AddMessages(self, rctext, node):

310 '''Scans 'rctext' for all messages in string tables, preprocesses them as

311 much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d

312 type format specifiers get those specifiers replaced with placeholders, and

313 HTML-formatted messages get run through the HTML-placeholderizer). Adds

314 each message as a <message> node child of 'node'.'''

315 for tm in _STRING_TABLE.finditer(rctext):

316 table = tm.group('body')

317 for mm in _MESSAGE.finditer(table):

318 comment_block = mm.group('comment')

319 comment_text = []

320 for cm in _COMMENT_TEXT.finditer(comment_block):

321 comment_text.append(cm.group('text'))

322 comment_text = ' '.join(comment_text)

323

324 id = mm.group('id')

325 text = rc.Section.UnEscape(mm.group('text'))

326

327 self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))

328

329 msg_obj = self.Placeholderize(text)

330

331 # Messages that contain only placeholders do not need translation.

332 is_translateable = False

333 for item in msg_obj.GetContent():

334 if isinstance(item, types.StringTypes):

335 if not _WHITESPACE_ONLY.match(item):

336 is_translateable = True

337

338 if self.not_localizable_re.search(comment_text):

339 is_translateable = False

340

341 message_meaning = ''

342 internal_comment = ''

343

344 # If we have a "role model" (existing GRD file) and this node exists

345 # in the role model, use the description, meaning and translateable

346 # attributes from the role model.

347 if self.role_model:

348 role_node = self.role_model.GetNodeById(id)

349 if role_node:

350 is_translateable = role_node.IsTranslateable()

351 message_meaning = role_node.attrs['meaning']

352 comment_text = role_node.attrs['desc']

353 internal_comment = role_node.attrs['internal_comment']

354

355 # For nontranslateable messages, we don't want the complexity of

356 # placeholderizing everything.

357 if not is_translateable:

358 msg_obj = tclib.Message(text=text)

359

360 msg_node = message.MessageNode.Construct(node, msg_obj, id,

361 desc=comment_text,

362 translateable=is_translateable,

363 meaning=message_meaning)

364 msg_node.attrs['internal_comment'] = internal_comment

365

366 node.AddChild(msg_node)

367 self.ExtraVerboseOut('Done processing message %s\n' % id)

368

369

370 def Placeholderize(self, text):

371 '''Creates a tclib.Message object from 'text', attempting to recognize

372 a few different formats of text that can be automatically placeholderized

373 (HTML code, printf-style format strings, and FormatMessage-style format

374 strings).

375 '''

376

377 try:

378 # First try HTML placeholderizing.

379 # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing

380 msg = tr_html.HtmlToMessage(text, True)

381 for item in msg.GetContent():

382 if not isinstance(item, types.StringTypes):

383 return msg # Contained at least one placeholder, so we're done

384

385 # HTML placeholderization didn't do anything, so try to find printf or

386 # FormatMessage format specifiers and change them into placeholders.

387 msg = tclib.Message()

388 parts = _FORMAT_SPECIFIER.split(text)

389 todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc.

390 for part in parts:

391 if _FORMAT_SPECIFIER.match(part):

392 msg.AppendPlaceholder(tclib.Placeholder(

393 'TODO_%04d' % todo_counter, part, 'TODO'))

394 todo_counter += 1

395 elif part != '':

396 msg.AppendText(part)

397

398 if self.role_model and len(parts) > 1: # there are TODO placeholders

399 role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(

400 msg.GetRealContent(), '')

401 if role_model_msg:

402 # replace wholesale to get placeholder names and examples

403 msg = role_model_msg

404

405 return msg

406 except:

407 print 'Exception processing message with text "%s"' % text

408 raise

409

OLD	NEW

« no previous file with comments | « grit/tool/preprocess_unittest.py ('k') | grit/tool/rc2grd_unittest.py » ('j') | no next file with comments »