| Index: tools/grit/grit/tool/xmb.py
|
| diff --git a/tools/grit/grit/tool/xmb.py b/tools/grit/grit/tool/xmb.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..aaefeecad4b54b554402fb21049d89f22ea30072
|
| --- /dev/null
|
| +++ b/tools/grit/grit/tool/xmb.py
|
| @@ -0,0 +1,291 @@
|
| +#!/usr/bin/env python
|
| +# Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""The 'grit xmb' tool.
|
| +"""
|
| +
|
| +import getopt
|
| +import os
|
| +
|
| +from xml.sax import saxutils
|
| +
|
| +from grit import grd_reader
|
| +from grit import lazy_re
|
| +from grit import tclib
|
| +from grit import util
|
| +from grit.tool import interface
|
| +
|
| +
|
| +# Used to collapse presentable content to determine if
|
| +# xml:space="preserve" is needed.
|
| +_WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*')
|
| +
|
| +
|
| +# See XmlEscape below.
|
| +_XML_QUOTE_ESCAPES = {
|
| + u"'": u''',
|
| + u'"': u'"',
|
| +}
|
| +_XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D'
|
| + u'\u0020-\uD7FF\uE000-\uFFFD]')
|
| +
|
| +
|
| +def _XmlEscape(s):
|
| + """Returns text escaped for XML in a way compatible with Google's
|
| + internal Translation Console tool. May be used for attributes as
|
| + well as for contents.
|
| + """
|
| + if not type(s) == unicode:
|
| + s = unicode(s)
|
| + result = saxutils.escape(s, _XML_QUOTE_ESCAPES)
|
| + return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8')
|
| +
|
| +
|
| +def _WriteAttribute(file, name, value):
|
| + """Writes an XML attribute to the specified file.
|
| +
|
| + Args:
|
| + file: file to write to
|
| + name: name of the attribute
|
| + value: (unescaped) value of the attribute
|
| + """
|
| + if value:
|
| + file.write(' %s="%s"' % (name, _XmlEscape(value)))
|
| +
|
| +
|
| +def _WriteMessage(file, message):
|
| + presentable_content = message.GetPresentableContent()
|
| + assert (type(presentable_content) == unicode or
|
| + (len(message.parts) == 1 and
|
| + type(message.parts[0] == tclib.Placeholder)))
|
| + preserve_space = presentable_content != _WHITESPACES_REGEX.sub(
|
| + u' ', presentable_content.strip())
|
| +
|
| + file.write('<msg')
|
| + _WriteAttribute(file, 'desc', message.GetDescription())
|
| + _WriteAttribute(file, 'id', message.GetId())
|
| + _WriteAttribute(file, 'meaning', message.GetMeaning())
|
| + if preserve_space:
|
| + _WriteAttribute(file, 'xml:space', 'preserve')
|
| + file.write('>')
|
| + if not preserve_space:
|
| + file.write('\n ')
|
| +
|
| + parts = message.GetContent()
|
| + for part in parts:
|
| + if isinstance(part, tclib.Placeholder):
|
| + file.write('<ph')
|
| + _WriteAttribute(file, 'name', part.GetPresentation())
|
| + file.write('><ex>')
|
| + file.write(_XmlEscape(part.GetExample()))
|
| + file.write('</ex>')
|
| + file.write(_XmlEscape(part.GetOriginal()))
|
| + file.write('</ph>')
|
| + else:
|
| + file.write(_XmlEscape(part))
|
| + if not preserve_space:
|
| + file.write('\n')
|
| + file.write('</msg>\n')
|
| +
|
| +
|
| +def WriteXmbFile(file, messages):
|
| + """Writes the given grit.tclib.Message items to the specified open
|
| + file-like object in the XMB format.
|
| + """
|
| + file.write("""<?xml version="1.0" encoding="UTF-8"?>
|
| +<!DOCTYPE messagebundle [
|
| +<!ELEMENT messagebundle (msg)*>
|
| +<!ATTLIST messagebundle class CDATA #IMPLIED>
|
| +
|
| +<!ELEMENT msg (#PCDATA|ph|source)*>
|
| +<!ATTLIST msg id CDATA #IMPLIED>
|
| +<!ATTLIST msg seq CDATA #IMPLIED>
|
| +<!ATTLIST msg name CDATA #IMPLIED>
|
| +<!ATTLIST msg desc CDATA #IMPLIED>
|
| +<!ATTLIST msg meaning CDATA #IMPLIED>
|
| +<!ATTLIST msg obsolete (obsolete) #IMPLIED>
|
| +<!ATTLIST msg xml:space (default|preserve) "default">
|
| +<!ATTLIST msg is_hidden CDATA #IMPLIED>
|
| +
|
| +<!ELEMENT source (#PCDATA)>
|
| +
|
| +<!ELEMENT ph (#PCDATA|ex)*>
|
| +<!ATTLIST ph name CDATA #REQUIRED>
|
| +
|
| +<!ELEMENT ex (#PCDATA)>
|
| +]>
|
| +<messagebundle>
|
| +""")
|
| + for message in messages:
|
| + _WriteMessage(file, message)
|
| + file.write('</messagebundle>')
|
| +
|
| +
|
| +class OutputXmb(interface.Tool):
|
| + """Outputs all translateable messages in the .grd input file to an
|
| +.xmb file, which is the format used to give source messages to
|
| +Google's internal Translation Console tool. The format could easily
|
| +be used for other systems.
|
| +
|
| +Usage: grit xmb [-i|-h] [-l LIMITFILE] OUTPUTPATH
|
| +
|
| +OUTPUTPATH is the path you want to output the .xmb file to.
|
| +
|
| +The -l option can be used to output only some of the resources to the .xmb file.
|
| +LIMITFILE is the path to a file that is used to limit the items output to the
|
| +xmb file. If the filename extension is .grd, the file must be a .grd file
|
| +and the tool only output the contents of nodes from the input file that also
|
| +exist in the limit file (as compared on the 'name' attribute). Otherwise it must
|
| +contain a list of the IDs that output should be limited to, one ID per line, and
|
| +the tool will only output nodes with 'name' attributes that match one of the
|
| +IDs.
|
| +
|
| +The -i option causes 'grit xmb' to output an "IDs only" file instead of an XMB
|
| +file. The "IDs only" file contains the message ID of each message that would
|
| +normally be output to the XMB file, one message ID per line. It is designed for
|
| +use with the 'grit transl2tc' tool's -l option.
|
| +
|
| +Other options:
|
| +
|
| + -D NAME[=VAL] Specify a C-preprocessor-like define NAME with optional
|
| + value VAL (defaults to 1) which will be used to control
|
| + conditional inclusion of resources.
|
| +
|
| + -E NAME=VALUE Set environment variable NAME to VALUE (within grit).
|
| +
|
| +"""
|
| + # The different output formats supported by this tool
|
| + FORMAT_XMB = 0
|
| + FORMAT_IDS_ONLY = 1
|
| +
|
| + def __init__(self, defines=None):
|
| + super(OutputXmb, self).__init__()
|
| + self.format = self.FORMAT_XMB
|
| + self.defines = defines or {}
|
| +
|
| + def ShortDescription(self):
|
| + return 'Exports all translateable messages into an XMB file.'
|
| +
|
| + def Run(self, opts, args):
|
| + self.SetOptions(opts)
|
| +
|
| + limit_file = None
|
| + limit_is_grd = False
|
| + limit_file_dir = None
|
| + own_opts, args = getopt.getopt(args, 'l:D:ih')
|
| + for key, val in own_opts:
|
| + if key == '-l':
|
| + limit_file = open(val, 'r')
|
| + limit_file_dir = util.dirname(val)
|
| + if not len(limit_file_dir):
|
| + limit_file_dir = '.'
|
| + limit_is_grd = os.path.splitext(val)[1] == '.grd'
|
| + elif key == '-i':
|
| + self.format = self.FORMAT_IDS_ONLY
|
| + elif key == '-D':
|
| + name, val = util.ParseDefine(val)
|
| + self.defines[name] = val
|
| + elif key == '-E':
|
| + (env_name, env_value) = val.split('=', 1)
|
| + os.environ[env_name] = env_value
|
| + if not len(args) == 1:
|
| + print ('grit xmb takes exactly one argument, the path to the XMB file '
|
| + 'to output.')
|
| + return 2
|
| +
|
| + xmb_path = args[0]
|
| + res_tree = grd_reader.Parse(opts.input, debug=opts.extra_verbose)
|
| + res_tree.SetOutputLanguage('en')
|
| + res_tree.SetDefines(self.defines)
|
| + res_tree.OnlyTheseTranslations([])
|
| + res_tree.RunGatherers()
|
| +
|
| + with open(xmb_path, 'wb') as output_file:
|
| + self.Process(
|
| + res_tree, output_file, limit_file, limit_is_grd, limit_file_dir)
|
| + if limit_file:
|
| + limit_file.close()
|
| + print "Wrote %s" % xmb_path
|
| +
|
| + def Process(self, res_tree, output_file, limit_file=None, limit_is_grd=False,
|
| + dir=None):
|
| + """Writes a document with the contents of res_tree into output_file,
|
| + limiting output to the IDs specified in limit_file, which is a GRD file if
|
| + limit_is_grd is true, otherwise a file with one ID per line.
|
| +
|
| + The format of the output document depends on this object's format attribute.
|
| + It can be FORMAT_XMB or FORMAT_IDS_ONLY.
|
| +
|
| + The FORMAT_IDS_ONLY format causes this function to write just a list
|
| + of the IDs of all messages that would have been added to the XMB file, one
|
| + ID per line.
|
| +
|
| + The FORMAT_XMB format causes this function to output the (default) XMB
|
| + format.
|
| +
|
| + Args:
|
| + res_tree: base.Node()
|
| + output_file: file open for writing
|
| + limit_file: None or file open for reading
|
| + limit_is_grd: True | False
|
| + dir: Directory of the limit file
|
| + """
|
| + if limit_file:
|
| + if limit_is_grd:
|
| + limit_list = []
|
| + limit_tree = grd_reader.Parse(limit_file,
|
| + dir=dir,
|
| + debug=self.o.extra_verbose)
|
| + for node in limit_tree:
|
| + if 'name' in node.attrs:
|
| + limit_list.append(node.attrs['name'])
|
| + else:
|
| + # Not a GRD file, so it's just a file with one ID per line
|
| + limit_list = [item.strip() for item in limit_file.read().split('\n')]
|
| +
|
| + ids_already_done = {}
|
| + messages = []
|
| + for node in res_tree:
|
| + if (limit_file and
|
| + not ('name' in node.attrs and node.attrs['name'] in limit_list)):
|
| + continue
|
| + if not node.IsTranslateable():
|
| + continue
|
| +
|
| + for clique in node.GetCliques():
|
| + if not clique.IsTranslateable():
|
| + continue
|
| + if not clique.GetMessage().GetRealContent():
|
| + continue
|
| +
|
| + # Some explanation is in order here. Note that we can have
|
| + # many messages with the same ID.
|
| + #
|
| + # The way we work around this is to maintain a list of cliques
|
| + # per message ID (in the UberClique) and select the "best" one
|
| + # (the first one that has a description, or an arbitrary one
|
| + # if there is no description) for inclusion in the XMB file.
|
| + # The translations are all going to be the same for messages
|
| + # with the same ID, although the way we replace placeholders
|
| + # might be slightly different.
|
| + id = clique.GetMessage().GetId()
|
| + if id in ids_already_done:
|
| + continue
|
| + ids_already_done[id] = 1
|
| +
|
| + message = node.UberClique().BestClique(id).GetMessage()
|
| + messages += [message]
|
| +
|
| + # Ensure a stable order of messages, to help regression testing.
|
| + messages.sort(key=lambda x:x.GetId())
|
| +
|
| + if self.format == self.FORMAT_IDS_ONLY:
|
| + # We just print the list of IDs to the output file.
|
| + for msg in messages:
|
| + output_file.write(msg.GetId())
|
| + output_file.write('\n')
|
| + else:
|
| + assert self.format == self.FORMAT_XMB
|
| + WriteXmbFile(output_file, messages)
|
|
|