Index: grit/tool/xmb.py |
=================================================================== |
--- grit/tool/xmb.py (revision 202) |
+++ grit/tool/xmb.py (working copy) |
@@ -1,291 +0,0 @@ |
-#!/usr/bin/env python |
-# Copyright (c) 2012 The Chromium Authors. All rights reserved. |
-# Use of this source code is governed by a BSD-style license that can be |
-# found in the LICENSE file. |
- |
-"""The 'grit xmb' tool. |
-""" |
- |
-import getopt |
-import os |
- |
-from xml.sax import saxutils |
- |
-from grit import grd_reader |
-from grit import lazy_re |
-from grit import tclib |
-from grit import util |
-from grit.tool import interface |
- |
- |
-# Used to collapse presentable content to determine if |
-# xml:space="preserve" is needed. |
-_WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*') |
- |
- |
-# See XmlEscape below. |
-_XML_QUOTE_ESCAPES = { |
- u"'": u''', |
- u'"': u'"', |
-} |
-_XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D' |
- u'\u0020-\uD7FF\uE000-\uFFFD]') |
- |
- |
-def _XmlEscape(s): |
- """Returns text escaped for XML in a way compatible with Google's |
- internal Translation Console tool. May be used for attributes as |
- well as for contents. |
- """ |
- if not type(s) == unicode: |
- s = unicode(s) |
- result = saxutils.escape(s, _XML_QUOTE_ESCAPES) |
- return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8') |
- |
- |
-def _WriteAttribute(file, name, value): |
- """Writes an XML attribute to the specified file. |
- |
- Args: |
- file: file to write to |
- name: name of the attribute |
- value: (unescaped) value of the attribute |
- """ |
- if value: |
- file.write(' %s="%s"' % (name, _XmlEscape(value))) |
- |
- |
-def _WriteMessage(file, message): |
- presentable_content = message.GetPresentableContent() |
- assert (type(presentable_content) == unicode or |
- (len(message.parts) == 1 and |
- type(message.parts[0] == tclib.Placeholder))) |
- preserve_space = presentable_content != _WHITESPACES_REGEX.sub( |
- u' ', presentable_content.strip()) |
- |
- file.write('<msg') |
- _WriteAttribute(file, 'desc', message.GetDescription()) |
- _WriteAttribute(file, 'id', message.GetId()) |
- _WriteAttribute(file, 'meaning', message.GetMeaning()) |
- if preserve_space: |
- _WriteAttribute(file, 'xml:space', 'preserve') |
- file.write('>') |
- if not preserve_space: |
- file.write('\n ') |
- |
- parts = message.GetContent() |
- for part in parts: |
- if isinstance(part, tclib.Placeholder): |
- file.write('<ph') |
- _WriteAttribute(file, 'name', part.GetPresentation()) |
- file.write('><ex>') |
- file.write(_XmlEscape(part.GetExample())) |
- file.write('</ex>') |
- file.write(_XmlEscape(part.GetOriginal())) |
- file.write('</ph>') |
- else: |
- file.write(_XmlEscape(part)) |
- if not preserve_space: |
- file.write('\n') |
- file.write('</msg>\n') |
- |
- |
-def WriteXmbFile(file, messages): |
- """Writes the given grit.tclib.Message items to the specified open |
- file-like object in the XMB format. |
- """ |
- file.write("""<?xml version="1.0" encoding="UTF-8"?> |
-<!DOCTYPE messagebundle [ |
-<!ELEMENT messagebundle (msg)*> |
-<!ATTLIST messagebundle class CDATA #IMPLIED> |
- |
-<!ELEMENT msg (#PCDATA|ph|source)*> |
-<!ATTLIST msg id CDATA #IMPLIED> |
-<!ATTLIST msg seq CDATA #IMPLIED> |
-<!ATTLIST msg name CDATA #IMPLIED> |
-<!ATTLIST msg desc CDATA #IMPLIED> |
-<!ATTLIST msg meaning CDATA #IMPLIED> |
-<!ATTLIST msg obsolete (obsolete) #IMPLIED> |
-<!ATTLIST msg xml:space (default|preserve) "default"> |
-<!ATTLIST msg is_hidden CDATA #IMPLIED> |
- |
-<!ELEMENT source (#PCDATA)> |
- |
-<!ELEMENT ph (#PCDATA|ex)*> |
-<!ATTLIST ph name CDATA #REQUIRED> |
- |
-<!ELEMENT ex (#PCDATA)> |
-]> |
-<messagebundle> |
-""") |
- for message in messages: |
- _WriteMessage(file, message) |
- file.write('</messagebundle>') |
- |
- |
-class OutputXmb(interface.Tool): |
- """Outputs all translateable messages in the .grd input file to an |
-.xmb file, which is the format used to give source messages to |
-Google's internal Translation Console tool. The format could easily |
-be used for other systems. |
- |
-Usage: grit xmb [-i|-h] [-l LIMITFILE] OUTPUTPATH |
- |
-OUTPUTPATH is the path you want to output the .xmb file to. |
- |
-The -l option can be used to output only some of the resources to the .xmb file. |
-LIMITFILE is the path to a file that is used to limit the items output to the |
-xmb file. If the filename extension is .grd, the file must be a .grd file |
-and the tool only output the contents of nodes from the input file that also |
-exist in the limit file (as compared on the 'name' attribute). Otherwise it must |
-contain a list of the IDs that output should be limited to, one ID per line, and |
-the tool will only output nodes with 'name' attributes that match one of the |
-IDs. |
- |
-The -i option causes 'grit xmb' to output an "IDs only" file instead of an XMB |
-file. The "IDs only" file contains the message ID of each message that would |
-normally be output to the XMB file, one message ID per line. It is designed for |
-use with the 'grit transl2tc' tool's -l option. |
- |
-Other options: |
- |
- -D NAME[=VAL] Specify a C-preprocessor-like define NAME with optional |
- value VAL (defaults to 1) which will be used to control |
- conditional inclusion of resources. |
- |
- -E NAME=VALUE Set environment variable NAME to VALUE (within grit). |
- |
-""" |
- # The different output formats supported by this tool |
- FORMAT_XMB = 0 |
- FORMAT_IDS_ONLY = 1 |
- |
- def __init__(self, defines=None): |
- super(OutputXmb, self).__init__() |
- self.format = self.FORMAT_XMB |
- self.defines = defines or {} |
- |
- def ShortDescription(self): |
- return 'Exports all translateable messages into an XMB file.' |
- |
- def Run(self, opts, args): |
- self.SetOptions(opts) |
- |
- limit_file = None |
- limit_is_grd = False |
- limit_file_dir = None |
- own_opts, args = getopt.getopt(args, 'l:D:ih') |
- for key, val in own_opts: |
- if key == '-l': |
- limit_file = open(val, 'r') |
- limit_file_dir = util.dirname(val) |
- if not len(limit_file_dir): |
- limit_file_dir = '.' |
- limit_is_grd = os.path.splitext(val)[1] == '.grd' |
- elif key == '-i': |
- self.format = self.FORMAT_IDS_ONLY |
- elif key == '-D': |
- name, val = util.ParseDefine(val) |
- self.defines[name] = val |
- elif key == '-E': |
- (env_name, env_value) = val.split('=', 1) |
- os.environ[env_name] = env_value |
- if not len(args) == 1: |
- print ('grit xmb takes exactly one argument, the path to the XMB file ' |
- 'to output.') |
- return 2 |
- |
- xmb_path = args[0] |
- res_tree = grd_reader.Parse(opts.input, debug=opts.extra_verbose) |
- res_tree.SetOutputLanguage('en') |
- res_tree.SetDefines(self.defines) |
- res_tree.OnlyTheseTranslations([]) |
- res_tree.RunGatherers() |
- |
- with open(xmb_path, 'wb') as output_file: |
- self.Process( |
- res_tree, output_file, limit_file, limit_is_grd, limit_file_dir) |
- if limit_file: |
- limit_file.close() |
- print "Wrote %s" % xmb_path |
- |
- def Process(self, res_tree, output_file, limit_file=None, limit_is_grd=False, |
- dir=None): |
- """Writes a document with the contents of res_tree into output_file, |
- limiting output to the IDs specified in limit_file, which is a GRD file if |
- limit_is_grd is true, otherwise a file with one ID per line. |
- |
- The format of the output document depends on this object's format attribute. |
- It can be FORMAT_XMB or FORMAT_IDS_ONLY. |
- |
- The FORMAT_IDS_ONLY format causes this function to write just a list |
- of the IDs of all messages that would have been added to the XMB file, one |
- ID per line. |
- |
- The FORMAT_XMB format causes this function to output the (default) XMB |
- format. |
- |
- Args: |
- res_tree: base.Node() |
- output_file: file open for writing |
- limit_file: None or file open for reading |
- limit_is_grd: True | False |
- dir: Directory of the limit file |
- """ |
- if limit_file: |
- if limit_is_grd: |
- limit_list = [] |
- limit_tree = grd_reader.Parse(limit_file, |
- dir=dir, |
- debug=self.o.extra_verbose) |
- for node in limit_tree: |
- if 'name' in node.attrs: |
- limit_list.append(node.attrs['name']) |
- else: |
- # Not a GRD file, so it's just a file with one ID per line |
- limit_list = [item.strip() for item in limit_file.read().split('\n')] |
- |
- ids_already_done = {} |
- messages = [] |
- for node in res_tree: |
- if (limit_file and |
- not ('name' in node.attrs and node.attrs['name'] in limit_list)): |
- continue |
- if not node.IsTranslateable(): |
- continue |
- |
- for clique in node.GetCliques(): |
- if not clique.IsTranslateable(): |
- continue |
- if not clique.GetMessage().GetRealContent(): |
- continue |
- |
- # Some explanation is in order here. Note that we can have |
- # many messages with the same ID. |
- # |
- # The way we work around this is to maintain a list of cliques |
- # per message ID (in the UberClique) and select the "best" one |
- # (the first one that has a description, or an arbitrary one |
- # if there is no description) for inclusion in the XMB file. |
- # The translations are all going to be the same for messages |
- # with the same ID, although the way we replace placeholders |
- # might be slightly different. |
- id = clique.GetMessage().GetId() |
- if id in ids_already_done: |
- continue |
- ids_already_done[id] = 1 |
- |
- message = node.UberClique().BestClique(id).GetMessage() |
- messages += [message] |
- |
- # Ensure a stable order of messages, to help regression testing. |
- messages.sort(key=lambda x:x.GetId()) |
- |
- if self.format == self.FORMAT_IDS_ONLY: |
- # We just print the list of IDs to the output file. |
- for msg in messages: |
- output_file.write(msg.GetId()) |
- output_file.write('\n') |
- else: |
- assert self.format == self.FORMAT_XMB |
- WriteXmbFile(output_file, messages) |