OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """The 'grit xmb' tool. | |
7 """ | |
8 | |
9 import getopt | |
10 import os | |
11 | |
12 from xml.sax import saxutils | |
13 | |
14 from grit import grd_reader | |
15 from grit import lazy_re | |
16 from grit import tclib | |
17 from grit import util | |
18 from grit.tool import interface | |
19 | |
20 | |
21 # Used to collapse presentable content to determine if | |
22 # xml:space="preserve" is needed. | |
23 _WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*') | |
24 | |
25 | |
26 # See XmlEscape below. | |
27 _XML_QUOTE_ESCAPES = { | |
28 u"'": u''', | |
29 u'"': u'"', | |
30 } | |
31 _XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D' | |
32 u'\u0020-\uD7FF\uE000-\uFFFD]') | |
33 | |
34 | |
35 def _XmlEscape(s): | |
36 """Returns text escaped for XML in a way compatible with Google's | |
37 internal Translation Console tool. May be used for attributes as | |
38 well as for contents. | |
39 """ | |
40 if not type(s) == unicode: | |
41 s = unicode(s) | |
42 result = saxutils.escape(s, _XML_QUOTE_ESCAPES) | |
43 return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8') | |
44 | |
45 | |
46 def _WriteAttribute(file, name, value): | |
47 """Writes an XML attribute to the specified file. | |
48 | |
49 Args: | |
50 file: file to write to | |
51 name: name of the attribute | |
52 value: (unescaped) value of the attribute | |
53 """ | |
54 if value: | |
55 file.write(' %s="%s"' % (name, _XmlEscape(value))) | |
56 | |
57 | |
58 def _WriteMessage(file, message): | |
59 presentable_content = message.GetPresentableContent() | |
60 assert (type(presentable_content) == unicode or | |
61 (len(message.parts) == 1 and | |
62 type(message.parts[0] == tclib.Placeholder))) | |
63 preserve_space = presentable_content != _WHITESPACES_REGEX.sub( | |
64 u' ', presentable_content.strip()) | |
65 | |
66 file.write('<msg') | |
67 _WriteAttribute(file, 'desc', message.GetDescription()) | |
68 _WriteAttribute(file, 'id', message.GetId()) | |
69 _WriteAttribute(file, 'meaning', message.GetMeaning()) | |
70 if preserve_space: | |
71 _WriteAttribute(file, 'xml:space', 'preserve') | |
72 file.write('>') | |
73 if not preserve_space: | |
74 file.write('\n ') | |
75 | |
76 parts = message.GetContent() | |
77 for part in parts: | |
78 if isinstance(part, tclib.Placeholder): | |
79 file.write('<ph') | |
80 _WriteAttribute(file, 'name', part.GetPresentation()) | |
81 file.write('><ex>') | |
82 file.write(_XmlEscape(part.GetExample())) | |
83 file.write('</ex>') | |
84 file.write(_XmlEscape(part.GetOriginal())) | |
85 file.write('</ph>') | |
86 else: | |
87 file.write(_XmlEscape(part)) | |
88 if not preserve_space: | |
89 file.write('\n') | |
90 file.write('</msg>\n') | |
91 | |
92 | |
93 def WriteXmbFile(file, messages): | |
94 """Writes the given grit.tclib.Message items to the specified open | |
95 file-like object in the XMB format. | |
96 """ | |
97 file.write("""<?xml version="1.0" encoding="UTF-8"?> | |
98 <!DOCTYPE messagebundle [ | |
99 <!ELEMENT messagebundle (msg)*> | |
100 <!ATTLIST messagebundle class CDATA #IMPLIED> | |
101 | |
102 <!ELEMENT msg (#PCDATA|ph|source)*> | |
103 <!ATTLIST msg id CDATA #IMPLIED> | |
104 <!ATTLIST msg seq CDATA #IMPLIED> | |
105 <!ATTLIST msg name CDATA #IMPLIED> | |
106 <!ATTLIST msg desc CDATA #IMPLIED> | |
107 <!ATTLIST msg meaning CDATA #IMPLIED> | |
108 <!ATTLIST msg obsolete (obsolete) #IMPLIED> | |
109 <!ATTLIST msg xml:space (default|preserve) "default"> | |
110 <!ATTLIST msg is_hidden CDATA #IMPLIED> | |
111 | |
112 <!ELEMENT source (#PCDATA)> | |
113 | |
114 <!ELEMENT ph (#PCDATA|ex)*> | |
115 <!ATTLIST ph name CDATA #REQUIRED> | |
116 | |
117 <!ELEMENT ex (#PCDATA)> | |
118 ]> | |
119 <messagebundle> | |
120 """) | |
121 for message in messages: | |
122 _WriteMessage(file, message) | |
123 file.write('</messagebundle>') | |
124 | |
125 | |
126 class OutputXmb(interface.Tool): | |
127 """Outputs all translateable messages in the .grd input file to an | |
128 .xmb file, which is the format used to give source messages to | |
129 Google's internal Translation Console tool. The format could easily | |
130 be used for other systems. | |
131 | |
132 Usage: grit xmb [-i|-h] [-l LIMITFILE] OUTPUTPATH | |
133 | |
134 OUTPUTPATH is the path you want to output the .xmb file to. | |
135 | |
136 The -l option can be used to output only some of the resources to the .xmb file. | |
137 LIMITFILE is the path to a file that is used to limit the items output to the | |
138 xmb file. If the filename extension is .grd, the file must be a .grd file | |
139 and the tool only output the contents of nodes from the input file that also | |
140 exist in the limit file (as compared on the 'name' attribute). Otherwise it must | |
141 contain a list of the IDs that output should be limited to, one ID per line, and | |
142 the tool will only output nodes with 'name' attributes that match one of the | |
143 IDs. | |
144 | |
145 The -i option causes 'grit xmb' to output an "IDs only" file instead of an XMB | |
146 file. The "IDs only" file contains the message ID of each message that would | |
147 normally be output to the XMB file, one message ID per line. It is designed for | |
148 use with the 'grit transl2tc' tool's -l option. | |
149 | |
150 Other options: | |
151 | |
152 -D NAME[=VAL] Specify a C-preprocessor-like define NAME with optional | |
153 value VAL (defaults to 1) which will be used to control | |
154 conditional inclusion of resources. | |
155 | |
156 -E NAME=VALUE Set environment variable NAME to VALUE (within grit). | |
157 | |
158 """ | |
159 # The different output formats supported by this tool | |
160 FORMAT_XMB = 0 | |
161 FORMAT_IDS_ONLY = 1 | |
162 | |
163 def __init__(self, defines=None): | |
164 super(OutputXmb, self).__init__() | |
165 self.format = self.FORMAT_XMB | |
166 self.defines = defines or {} | |
167 | |
168 def ShortDescription(self): | |
169 return 'Exports all translateable messages into an XMB file.' | |
170 | |
171 def Run(self, opts, args): | |
172 self.SetOptions(opts) | |
173 | |
174 limit_file = None | |
175 limit_is_grd = False | |
176 limit_file_dir = None | |
177 own_opts, args = getopt.getopt(args, 'l:D:ih') | |
178 for key, val in own_opts: | |
179 if key == '-l': | |
180 limit_file = open(val, 'r') | |
181 limit_file_dir = util.dirname(val) | |
182 if not len(limit_file_dir): | |
183 limit_file_dir = '.' | |
184 limit_is_grd = os.path.splitext(val)[1] == '.grd' | |
185 elif key == '-i': | |
186 self.format = self.FORMAT_IDS_ONLY | |
187 elif key == '-D': | |
188 name, val = util.ParseDefine(val) | |
189 self.defines[name] = val | |
190 elif key == '-E': | |
191 (env_name, env_value) = val.split('=', 1) | |
192 os.environ[env_name] = env_value | |
193 if not len(args) == 1: | |
194 print ('grit xmb takes exactly one argument, the path to the XMB file ' | |
195 'to output.') | |
196 return 2 | |
197 | |
198 xmb_path = args[0] | |
199 res_tree = grd_reader.Parse(opts.input, debug=opts.extra_verbose) | |
200 res_tree.SetOutputLanguage('en') | |
201 res_tree.SetDefines(self.defines) | |
202 res_tree.OnlyTheseTranslations([]) | |
203 res_tree.RunGatherers() | |
204 | |
205 with open(xmb_path, 'wb') as output_file: | |
206 self.Process( | |
207 res_tree, output_file, limit_file, limit_is_grd, limit_file_dir) | |
208 if limit_file: | |
209 limit_file.close() | |
210 print "Wrote %s" % xmb_path | |
211 | |
212 def Process(self, res_tree, output_file, limit_file=None, limit_is_grd=False, | |
213 dir=None): | |
214 """Writes a document with the contents of res_tree into output_file, | |
215 limiting output to the IDs specified in limit_file, which is a GRD file if | |
216 limit_is_grd is true, otherwise a file with one ID per line. | |
217 | |
218 The format of the output document depends on this object's format attribute. | |
219 It can be FORMAT_XMB or FORMAT_IDS_ONLY. | |
220 | |
221 The FORMAT_IDS_ONLY format causes this function to write just a list | |
222 of the IDs of all messages that would have been added to the XMB file, one | |
223 ID per line. | |
224 | |
225 The FORMAT_XMB format causes this function to output the (default) XMB | |
226 format. | |
227 | |
228 Args: | |
229 res_tree: base.Node() | |
230 output_file: file open for writing | |
231 limit_file: None or file open for reading | |
232 limit_is_grd: True | False | |
233 dir: Directory of the limit file | |
234 """ | |
235 if limit_file: | |
236 if limit_is_grd: | |
237 limit_list = [] | |
238 limit_tree = grd_reader.Parse(limit_file, | |
239 dir=dir, | |
240 debug=self.o.extra_verbose) | |
241 for node in limit_tree: | |
242 if 'name' in node.attrs: | |
243 limit_list.append(node.attrs['name']) | |
244 else: | |
245 # Not a GRD file, so it's just a file with one ID per line | |
246 limit_list = [item.strip() for item in limit_file.read().split('\n')] | |
247 | |
248 ids_already_done = {} | |
249 messages = [] | |
250 for node in res_tree: | |
251 if (limit_file and | |
252 not ('name' in node.attrs and node.attrs['name'] in limit_list)): | |
253 continue | |
254 if not node.IsTranslateable(): | |
255 continue | |
256 | |
257 for clique in node.GetCliques(): | |
258 if not clique.IsTranslateable(): | |
259 continue | |
260 if not clique.GetMessage().GetRealContent(): | |
261 continue | |
262 | |
263 # Some explanation is in order here. Note that we can have | |
264 # many messages with the same ID. | |
265 # | |
266 # The way we work around this is to maintain a list of cliques | |
267 # per message ID (in the UberClique) and select the "best" one | |
268 # (the first one that has a description, or an arbitrary one | |
269 # if there is no description) for inclusion in the XMB file. | |
270 # The translations are all going to be the same for messages | |
271 # with the same ID, although the way we replace placeholders | |
272 # might be slightly different. | |
273 id = clique.GetMessage().GetId() | |
274 if id in ids_already_done: | |
275 continue | |
276 ids_already_done[id] = 1 | |
277 | |
278 message = node.UberClique().BestClique(id).GetMessage() | |
279 messages += [message] | |
280 | |
281 # Ensure a stable order of messages, to help regression testing. | |
282 messages.sort(key=lambda x:x.GetId()) | |
283 | |
284 if self.format == self.FORMAT_IDS_ONLY: | |
285 # We just print the list of IDs to the output file. | |
286 for msg in messages: | |
287 output_file.write(msg.GetId()) | |
288 output_file.write('\n') | |
289 else: | |
290 assert self.format == self.FORMAT_XMB | |
291 WriteXmbFile(output_file, messages) | |
OLD | NEW |