grit/tool/xmb.py - Issue 1442863002: Remove contents of grit's SVN repository.

Side by Side Diff: grit/tool/xmb.py

Issue 1442863002: Remove contents of grit's SVN repository. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 """The 'grit xmb' tool.

7 """

8

9 import getopt

10 import os

11

12 from xml.sax import saxutils

13

14 from grit import grd_reader

15 from grit import lazy_re

16 from grit import tclib

17 from grit import util

18 from grit.tool import interface

19

20

21 # Used to collapse presentable content to determine if

22 # xml:space="preserve" is needed.

23 _WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*')

24

25

26 # See XmlEscape below.

27 _XML_QUOTE_ESCAPES = {

28 u"'": u''',

29 u'"': u'"',

30 }

31 _XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D'

32 u'\u0020-\uD7FF\uE000-\uFFFD]')

33

34

35 def _XmlEscape(s):

36 """Returns text escaped for XML in a way compatible with Google's

37 internal Translation Console tool. May be used for attributes as

38 well as for contents.

39 """

40 if not type(s) == unicode:

41 s = unicode(s)

42 result = saxutils.escape(s, _XML_QUOTE_ESCAPES)

43 return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8')

44

45

46 def _WriteAttribute(file, name, value):

47 """Writes an XML attribute to the specified file.

48

49 Args:

50 file: file to write to

51 name: name of the attribute

52 value: (unescaped) value of the attribute

53 """

54 if value:

55 file.write(' %s="%s"' % (name, _XmlEscape(value)))

56

57

58 def _WriteMessage(file, message):

59 presentable_content = message.GetPresentableContent()

60 assert (type(presentable_content) == unicode or

61 (len(message.parts) == 1 and

62 type(message.parts[0] == tclib.Placeholder)))

63 preserve_space = presentable_content != _WHITESPACES_REGEX.sub(

64 u' ', presentable_content.strip())

65

66 file.write('<msg')

67 _WriteAttribute(file, 'desc', message.GetDescription())

68 _WriteAttribute(file, 'id', message.GetId())

69 _WriteAttribute(file, 'meaning', message.GetMeaning())

70 if preserve_space:

71 _WriteAttribute(file, 'xml:space', 'preserve')

72 file.write('>')

73 if not preserve_space:

74 file.write('\n ')

75

76 parts = message.GetContent()

77 for part in parts:

78 if isinstance(part, tclib.Placeholder):

79 file.write('<ph')

80 _WriteAttribute(file, 'name', part.GetPresentation())

81 file.write('><ex>')

82 file.write(_XmlEscape(part.GetExample()))

83 file.write('</ex>')

84 file.write(_XmlEscape(part.GetOriginal()))

85 file.write('</ph>')

86 else:

87 file.write(_XmlEscape(part))

88 if not preserve_space:

89 file.write('\n')

90 file.write('</msg>\n')

91

92

93 def WriteXmbFile(file, messages):

94 """Writes the given grit.tclib.Message items to the specified open

95 file-like object in the XMB format.

96 """

97 file.write("""<?xml version="1.0" encoding="UTF-8"?>

98 <!DOCTYPE messagebundle [

99 <!ELEMENT messagebundle (msg)*>

100 <!ATTLIST messagebundle class CDATA #IMPLIED>

101

102 <!ELEMENT msg (#PCDATA\|ph\|source)*>

103 <!ATTLIST msg id CDATA #IMPLIED>

104 <!ATTLIST msg seq CDATA #IMPLIED>

105 <!ATTLIST msg name CDATA #IMPLIED>

106 <!ATTLIST msg desc CDATA #IMPLIED>

107 <!ATTLIST msg meaning CDATA #IMPLIED>

108 <!ATTLIST msg obsolete (obsolete) #IMPLIED>

109 <!ATTLIST msg xml:space (default\|preserve) "default">

110 <!ATTLIST msg is_hidden CDATA #IMPLIED>

111

112 <!ELEMENT source (#PCDATA)>

113

114 <!ELEMENT ph (#PCDATA\|ex)*>

115 <!ATTLIST ph name CDATA #REQUIRED>

116

117 <!ELEMENT ex (#PCDATA)>

118 ]>

119 <messagebundle>

120 """)

121 for message in messages:

122 _WriteMessage(file, message)

123 file.write('</messagebundle>')

124

125

126 class OutputXmb(interface.Tool):

127 """Outputs all translateable messages in the .grd input file to an

128 .xmb file, which is the format used to give source messages to

129 Google's internal Translation Console tool. The format could easily

130 be used for other systems.

131

132 Usage: grit xmb [-i\|-h] [-l LIMITFILE] OUTPUTPATH

133

134 OUTPUTPATH is the path you want to output the .xmb file to.

135

136 The -l option can be used to output only some of the resources to the .xmb file.

137 LIMITFILE is the path to a file that is used to limit the items output to the

138 xmb file. If the filename extension is .grd, the file must be a .grd file

139 and the tool only output the contents of nodes from the input file that also

140 exist in the limit file (as compared on the 'name' attribute). Otherwise it must

141 contain a list of the IDs that output should be limited to, one ID per line, and

142 the tool will only output nodes with 'name' attributes that match one of the

143 IDs.

144

145 The -i option causes 'grit xmb' to output an "IDs only" file instead of an XMB

146 file. The "IDs only" file contains the message ID of each message that would

147 normally be output to the XMB file, one message ID per line. It is designed for

148 use with the 'grit transl2tc' tool's -l option.

149

150 Other options:

151

152 -D NAME[=VAL] Specify a C-preprocessor-like define NAME with optional

153 value VAL (defaults to 1) which will be used to control

154 conditional inclusion of resources.

155

156 -E NAME=VALUE Set environment variable NAME to VALUE (within grit).

157

158 """

159 # The different output formats supported by this tool

160 FORMAT_XMB = 0

161 FORMAT_IDS_ONLY = 1

162

163 def __init__(self, defines=None):

164 super(OutputXmb, self).__init__()

165 self.format = self.FORMAT_XMB

166 self.defines = defines or {}

167

168 def ShortDescription(self):

169 return 'Exports all translateable messages into an XMB file.'

170

171 def Run(self, opts, args):

172 self.SetOptions(opts)

173

174 limit_file = None

175 limit_is_grd = False

176 limit_file_dir = None

177 own_opts, args = getopt.getopt(args, 'l:D:ih')

178 for key, val in own_opts:

179 if key == '-l':

180 limit_file = open(val, 'r')

181 limit_file_dir = util.dirname(val)

182 if not len(limit_file_dir):

183 limit_file_dir = '.'

184 limit_is_grd = os.path.splitext(val)[1] == '.grd'

185 elif key == '-i':

186 self.format = self.FORMAT_IDS_ONLY

187 elif key == '-D':

188 name, val = util.ParseDefine(val)

189 self.defines[name] = val

190 elif key == '-E':

191 (env_name, env_value) = val.split('=', 1)

192 os.environ[env_name] = env_value

193 if not len(args) == 1:

194 print ('grit xmb takes exactly one argument, the path to the XMB file '

195 'to output.')

196 return 2

197

198 xmb_path = args[0]

199 res_tree = grd_reader.Parse(opts.input, debug=opts.extra_verbose)

200 res_tree.SetOutputLanguage('en')

201 res_tree.SetDefines(self.defines)

202 res_tree.OnlyTheseTranslations([])

203 res_tree.RunGatherers()

204

205 with open(xmb_path, 'wb') as output_file:

206 self.Process(

207 res_tree, output_file, limit_file, limit_is_grd, limit_file_dir)

208 if limit_file:

209 limit_file.close()

210 print "Wrote %s" % xmb_path

211

212 def Process(self, res_tree, output_file, limit_file=None, limit_is_grd=False,

213 dir=None):

214 """Writes a document with the contents of res_tree into output_file,

215 limiting output to the IDs specified in limit_file, which is a GRD file if

216 limit_is_grd is true, otherwise a file with one ID per line.

217

218 The format of the output document depends on this object's format attribute.

219 It can be FORMAT_XMB or FORMAT_IDS_ONLY.

220

221 The FORMAT_IDS_ONLY format causes this function to write just a list

222 of the IDs of all messages that would have been added to the XMB file, one

223 ID per line.

224

225 The FORMAT_XMB format causes this function to output the (default) XMB

226 format.

227

228 Args:

229 res_tree: base.Node()

230 output_file: file open for writing

231 limit_file: None or file open for reading

232 limit_is_grd: True \| False

233 dir: Directory of the limit file

234 """

235 if limit_file:

236 if limit_is_grd:

237 limit_list = []

238 limit_tree = grd_reader.Parse(limit_file,

239 dir=dir,

240 debug=self.o.extra_verbose)

241 for node in limit_tree:

242 if 'name' in node.attrs:

243 limit_list.append(node.attrs['name'])

244 else:

245 # Not a GRD file, so it's just a file with one ID per line

246 limit_list = [item.strip() for item in limit_file.read().split('\n')]

247

248 ids_already_done = {}

249 messages = []

250 for node in res_tree:

251 if (limit_file and

252 not ('name' in node.attrs and node.attrs['name'] in limit_list)):

253 continue

254 if not node.IsTranslateable():

255 continue

256

257 for clique in node.GetCliques():

258 if not clique.IsTranslateable():

259 continue

260 if not clique.GetMessage().GetRealContent():

261 continue

262

263 # Some explanation is in order here. Note that we can have

264 # many messages with the same ID.

265 #

266 # The way we work around this is to maintain a list of cliques

267 # per message ID (in the UberClique) and select the "best" one

268 # (the first one that has a description, or an arbitrary one

269 # if there is no description) for inclusion in the XMB file.

270 # The translations are all going to be the same for messages

271 # with the same ID, although the way we replace placeholders

272 # might be slightly different.

273 id = clique.GetMessage().GetId()

274 if id in ids_already_done:

275 continue

276 ids_already_done[id] = 1

277

278 message = node.UberClique().BestClique(id).GetMessage()

279 messages += [message]

280

281 # Ensure a stable order of messages, to help regression testing.

282 messages.sort(key=lambda x:x.GetId())

283

284 if self.format == self.FORMAT_IDS_ONLY:

285 # We just print the list of IDs to the output file.

286 for msg in messages:

287 output_file.write(msg.GetId())

288 output_file.write('\n')

289 else:

290 assert self.format == self.FORMAT_XMB

291 WriteXmbFile(output_file, messages)

OLD	NEW

« no previous file with comments | « grit/tool/unit.py ('k') | grit/tool/xmb_unittest.py » ('j') | no next file with comments »