| OLD | NEW |
| (Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """The 'grit xmb' tool. |
| 7 """ |
| 8 |
| 9 import getopt |
| 10 import os |
| 11 |
| 12 from xml.sax import saxutils |
| 13 |
| 14 from grit import grd_reader |
| 15 from grit import lazy_re |
| 16 from grit import tclib |
| 17 from grit import util |
| 18 from grit.tool import interface |
| 19 |
| 20 |
| 21 # Used to collapse presentable content to determine if |
| 22 # xml:space="preserve" is needed. |
| 23 _WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*') |
| 24 |
| 25 |
| 26 # See XmlEscape below. |
| 27 _XML_QUOTE_ESCAPES = { |
| 28 u"'": u''', |
| 29 u'"': u'"', |
| 30 } |
| 31 _XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D' |
| 32 u'\u0020-\uD7FF\uE000-\uFFFD]') |
| 33 |
| 34 |
| 35 def _XmlEscape(s): |
| 36 """Returns text escaped for XML in a way compatible with Google's |
| 37 internal Translation Console tool. May be used for attributes as |
| 38 well as for contents. |
| 39 """ |
| 40 if not type(s) == unicode: |
| 41 s = unicode(s) |
| 42 result = saxutils.escape(s, _XML_QUOTE_ESCAPES) |
| 43 return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8') |
| 44 |
| 45 |
| 46 def _WriteAttribute(file, name, value): |
| 47 """Writes an XML attribute to the specified file. |
| 48 |
| 49 Args: |
| 50 file: file to write to |
| 51 name: name of the attribute |
| 52 value: (unescaped) value of the attribute |
| 53 """ |
| 54 if value: |
| 55 file.write(' %s="%s"' % (name, _XmlEscape(value))) |
| 56 |
| 57 |
| 58 def _WriteMessage(file, message): |
| 59 presentable_content = message.GetPresentableContent() |
| 60 assert (type(presentable_content) == unicode or |
| 61 (len(message.parts) == 1 and |
| 62 type(message.parts[0] == tclib.Placeholder))) |
| 63 preserve_space = presentable_content != _WHITESPACES_REGEX.sub( |
| 64 u' ', presentable_content.strip()) |
| 65 |
| 66 file.write('<msg') |
| 67 _WriteAttribute(file, 'desc', message.GetDescription()) |
| 68 _WriteAttribute(file, 'id', message.GetId()) |
| 69 _WriteAttribute(file, 'meaning', message.GetMeaning()) |
| 70 if preserve_space: |
| 71 _WriteAttribute(file, 'xml:space', 'preserve') |
| 72 file.write('>') |
| 73 if not preserve_space: |
| 74 file.write('\n ') |
| 75 |
| 76 parts = message.GetContent() |
| 77 for part in parts: |
| 78 if isinstance(part, tclib.Placeholder): |
| 79 file.write('<ph') |
| 80 _WriteAttribute(file, 'name', part.GetPresentation()) |
| 81 file.write('><ex>') |
| 82 file.write(_XmlEscape(part.GetExample())) |
| 83 file.write('</ex>') |
| 84 file.write(_XmlEscape(part.GetOriginal())) |
| 85 file.write('</ph>') |
| 86 else: |
| 87 file.write(_XmlEscape(part)) |
| 88 if not preserve_space: |
| 89 file.write('\n') |
| 90 file.write('</msg>\n') |
| 91 |
| 92 |
| 93 def WriteXmbFile(file, messages): |
| 94 """Writes the given grit.tclib.Message items to the specified open |
| 95 file-like object in the XMB format. |
| 96 """ |
| 97 file.write("""<?xml version="1.0" encoding="UTF-8"?> |
| 98 <!DOCTYPE messagebundle [ |
| 99 <!ELEMENT messagebundle (msg)*> |
| 100 <!ATTLIST messagebundle class CDATA #IMPLIED> |
| 101 |
| 102 <!ELEMENT msg (#PCDATA|ph|source)*> |
| 103 <!ATTLIST msg id CDATA #IMPLIED> |
| 104 <!ATTLIST msg seq CDATA #IMPLIED> |
| 105 <!ATTLIST msg name CDATA #IMPLIED> |
| 106 <!ATTLIST msg desc CDATA #IMPLIED> |
| 107 <!ATTLIST msg meaning CDATA #IMPLIED> |
| 108 <!ATTLIST msg obsolete (obsolete) #IMPLIED> |
| 109 <!ATTLIST msg xml:space (default|preserve) "default"> |
| 110 <!ATTLIST msg is_hidden CDATA #IMPLIED> |
| 111 |
| 112 <!ELEMENT source (#PCDATA)> |
| 113 |
| 114 <!ELEMENT ph (#PCDATA|ex)*> |
| 115 <!ATTLIST ph name CDATA #REQUIRED> |
| 116 |
| 117 <!ELEMENT ex (#PCDATA)> |
| 118 ]> |
| 119 <messagebundle> |
| 120 """) |
| 121 for message in messages: |
| 122 _WriteMessage(file, message) |
| 123 file.write('</messagebundle>') |
| 124 |
| 125 |
| 126 class OutputXmb(interface.Tool): |
| 127 """Outputs all translateable messages in the .grd input file to an |
| 128 .xmb file, which is the format used to give source messages to |
| 129 Google's internal Translation Console tool. The format could easily |
| 130 be used for other systems. |
| 131 |
| 132 Usage: grit xmb [-i|-h] [-l LIMITFILE] OUTPUTPATH |
| 133 |
| 134 OUTPUTPATH is the path you want to output the .xmb file to. |
| 135 |
| 136 The -l option can be used to output only some of the resources to the .xmb file. |
| 137 LIMITFILE is the path to a file that is used to limit the items output to the |
| 138 xmb file. If the filename extension is .grd, the file must be a .grd file |
| 139 and the tool only output the contents of nodes from the input file that also |
| 140 exist in the limit file (as compared on the 'name' attribute). Otherwise it must |
| 141 contain a list of the IDs that output should be limited to, one ID per line, and |
| 142 the tool will only output nodes with 'name' attributes that match one of the |
| 143 IDs. |
| 144 |
| 145 The -i option causes 'grit xmb' to output an "IDs only" file instead of an XMB |
| 146 file. The "IDs only" file contains the message ID of each message that would |
| 147 normally be output to the XMB file, one message ID per line. It is designed for |
| 148 use with the 'grit transl2tc' tool's -l option. |
| 149 |
| 150 Other options: |
| 151 |
| 152 -D NAME[=VAL] Specify a C-preprocessor-like define NAME with optional |
| 153 value VAL (defaults to 1) which will be used to control |
| 154 conditional inclusion of resources. |
| 155 |
| 156 -E NAME=VALUE Set environment variable NAME to VALUE (within grit). |
| 157 |
| 158 """ |
| 159 # The different output formats supported by this tool |
| 160 FORMAT_XMB = 0 |
| 161 FORMAT_IDS_ONLY = 1 |
| 162 |
| 163 def __init__(self, defines=None): |
| 164 super(OutputXmb, self).__init__() |
| 165 self.format = self.FORMAT_XMB |
| 166 self.defines = defines or {} |
| 167 |
| 168 def ShortDescription(self): |
| 169 return 'Exports all translateable messages into an XMB file.' |
| 170 |
| 171 def Run(self, opts, args): |
| 172 self.SetOptions(opts) |
| 173 |
| 174 limit_file = None |
| 175 limit_is_grd = False |
| 176 limit_file_dir = None |
| 177 own_opts, args = getopt.getopt(args, 'l:D:ih') |
| 178 for key, val in own_opts: |
| 179 if key == '-l': |
| 180 limit_file = open(val, 'r') |
| 181 limit_file_dir = util.dirname(val) |
| 182 if not len(limit_file_dir): |
| 183 limit_file_dir = '.' |
| 184 limit_is_grd = os.path.splitext(val)[1] == '.grd' |
| 185 elif key == '-i': |
| 186 self.format = self.FORMAT_IDS_ONLY |
| 187 elif key == '-D': |
| 188 name, val = util.ParseDefine(val) |
| 189 self.defines[name] = val |
| 190 elif key == '-E': |
| 191 (env_name, env_value) = val.split('=', 1) |
| 192 os.environ[env_name] = env_value |
| 193 if not len(args) == 1: |
| 194 print ('grit xmb takes exactly one argument, the path to the XMB file ' |
| 195 'to output.') |
| 196 return 2 |
| 197 |
| 198 xmb_path = args[0] |
| 199 res_tree = grd_reader.Parse(opts.input, debug=opts.extra_verbose) |
| 200 res_tree.SetOutputLanguage('en') |
| 201 res_tree.SetDefines(self.defines) |
| 202 res_tree.OnlyTheseTranslations([]) |
| 203 res_tree.RunGatherers() |
| 204 |
| 205 with open(xmb_path, 'wb') as output_file: |
| 206 self.Process( |
| 207 res_tree, output_file, limit_file, limit_is_grd, limit_file_dir) |
| 208 if limit_file: |
| 209 limit_file.close() |
| 210 print "Wrote %s" % xmb_path |
| 211 |
| 212 def Process(self, res_tree, output_file, limit_file=None, limit_is_grd=False, |
| 213 dir=None): |
| 214 """Writes a document with the contents of res_tree into output_file, |
| 215 limiting output to the IDs specified in limit_file, which is a GRD file if |
| 216 limit_is_grd is true, otherwise a file with one ID per line. |
| 217 |
| 218 The format of the output document depends on this object's format attribute. |
| 219 It can be FORMAT_XMB or FORMAT_IDS_ONLY. |
| 220 |
| 221 The FORMAT_IDS_ONLY format causes this function to write just a list |
| 222 of the IDs of all messages that would have been added to the XMB file, one |
| 223 ID per line. |
| 224 |
| 225 The FORMAT_XMB format causes this function to output the (default) XMB |
| 226 format. |
| 227 |
| 228 Args: |
| 229 res_tree: base.Node() |
| 230 output_file: file open for writing |
| 231 limit_file: None or file open for reading |
| 232 limit_is_grd: True | False |
| 233 dir: Directory of the limit file |
| 234 """ |
| 235 if limit_file: |
| 236 if limit_is_grd: |
| 237 limit_list = [] |
| 238 limit_tree = grd_reader.Parse(limit_file, |
| 239 dir=dir, |
| 240 debug=self.o.extra_verbose) |
| 241 for node in limit_tree: |
| 242 if 'name' in node.attrs: |
| 243 limit_list.append(node.attrs['name']) |
| 244 else: |
| 245 # Not a GRD file, so it's just a file with one ID per line |
| 246 limit_list = [item.strip() for item in limit_file.read().split('\n')] |
| 247 |
| 248 ids_already_done = {} |
| 249 messages = [] |
| 250 for node in res_tree: |
| 251 if (limit_file and |
| 252 not ('name' in node.attrs and node.attrs['name'] in limit_list)): |
| 253 continue |
| 254 if not node.IsTranslateable(): |
| 255 continue |
| 256 |
| 257 for clique in node.GetCliques(): |
| 258 if not clique.IsTranslateable(): |
| 259 continue |
| 260 if not clique.GetMessage().GetRealContent(): |
| 261 continue |
| 262 |
| 263 # Some explanation is in order here. Note that we can have |
| 264 # many messages with the same ID. |
| 265 # |
| 266 # The way we work around this is to maintain a list of cliques |
| 267 # per message ID (in the UberClique) and select the "best" one |
| 268 # (the first one that has a description, or an arbitrary one |
| 269 # if there is no description) for inclusion in the XMB file. |
| 270 # The translations are all going to be the same for messages |
| 271 # with the same ID, although the way we replace placeholders |
| 272 # might be slightly different. |
| 273 id = clique.GetMessage().GetId() |
| 274 if id in ids_already_done: |
| 275 continue |
| 276 ids_already_done[id] = 1 |
| 277 |
| 278 message = node.UberClique().BestClique(id).GetMessage() |
| 279 messages += [message] |
| 280 |
| 281 # Ensure a stable order of messages, to help regression testing. |
| 282 messages.sort(key=lambda x:x.GetId()) |
| 283 |
| 284 if self.format == self.FORMAT_IDS_ONLY: |
| 285 # We just print the list of IDs to the output file. |
| 286 for msg in messages: |
| 287 output_file.write(msg.GetId()) |
| 288 output_file.write('\n') |
| 289 else: |
| 290 assert self.format == self.FORMAT_XMB |
| 291 WriteXmbFile(output_file, messages) |
| OLD | NEW |