Index: grit/tool/transl2tc.py |
=================================================================== |
--- grit/tool/transl2tc.py (revision 0) |
+++ grit/tool/transl2tc.py (revision 0) |
@@ -0,0 +1,254 @@ |
+#!/usr/bin/python2.4 |
+# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+'''The 'grit transl2tc' tool. |
+''' |
+ |
+ |
+import getopt |
+ |
+from grit.tool import interface |
+from grit.tool import rc2grd |
+from grit import grd_reader |
+from grit import util |
+ |
+from grit.extern import tclib |
+ |
+ |
+class TranslationToTc(interface.Tool): |
+ '''A tool for importing existing translations in RC format into the |
+Translation Console. |
+ |
+Usage: |
+ |
+grit -i GRD transl2tc [-l LIMITS] [RCOPTS] SOURCE_RC TRANSLATED_RC OUT_FILE |
+ |
+The tool needs a "source" RC file, i.e. in English, and an RC file that is a |
+translation of precisely the source RC file (not of an older or newer version). |
+ |
+The tool also requires you to provide a .grd file (input file) e.g. using the |
+-i global option or the GRIT_INPUT environment variable. The tool uses |
+information from your .grd file to correct placeholder names in the |
+translations and ensure that only translatable items and translations still |
+being used are output. |
+ |
+This tool will accept all the same RCOPTS as the 'grit rc2grd' tool. To get |
+a list of these options, run 'grit help rc2grd'. |
+ |
+Additionally, you can use the -l option (which must be the first option to the |
+tool) to specify a file containing a list of message IDs to which output should |
+be limited. This is only useful if you are limiting the output to your XMB |
+files using the 'grit xmb' tool's -l option. See 'grit help xmb' for how to |
+generate a file containing a list of the message IDs in an XMB file. |
+ |
+The tool will scan through both of the RC files as well as any HTML files they |
+refer to, and match together the source messages and translated messages. It |
+will output a file (OUTPUT_FILE) you can import directly into the TC using the |
+Bulk Translation Upload tool. |
+''' |
+ |
+ def ShortDescription(self): |
+ return 'Import existing translations in RC format into the TC' |
+ |
+ def Setup(self, globopt, args): |
+ '''Sets the instance up for use. |
+ ''' |
+ self.SetOptions(globopt) |
+ self.rc2grd = rc2grd.Rc2Grd() |
+ self.rc2grd.SetOptions(globopt) |
+ self.limits = None |
+ if len(args) and args[0] == '-l': |
+ limit_file = file(args[1]) |
+ self.limits = limit_file.read().split('\n') |
+ limit_file.close() |
+ args = args[2:] |
+ return self.rc2grd.ParseOptions(args) |
+ |
+ def Run(self, globopt, args): |
+ args = self.Setup(globopt, args) |
+ |
+ if len(args) != 3: |
+ self.Out('This tool takes exactly three arguments:\n' |
+ ' 1. The path to the original RC file\n' |
+ ' 2. The path to the translated RC file\n' |
+ ' 3. The output file path.\n') |
+ return 2 |
+ |
+ grd = grd_reader.Parse(self.o.input, debug=self.o.extra_verbose) |
+ grd.RunGatherers(recursive = True) |
+ |
+ source_rc = util.WrapInputStream(file(args[0], 'r'), self.rc2grd.input_encoding) |
+ transl_rc = util.WrapInputStream(file(args[1], 'r'), self.rc2grd.input_encoding) |
+ translations = self.ExtractTranslations(grd, |
+ source_rc.read(), args[0], |
+ transl_rc.read(), args[1]) |
+ transl_rc.close() |
+ source_rc.close() |
+ |
+ output_file = util.WrapOutputStream(file(args[2], 'w')) |
+ self.WriteTranslations(output_file, translations.items()) |
+ output_file.close() |
+ |
+ self.Out('Wrote output file %s' % args[2]) |
+ |
+ def ExtractTranslations(self, current_grd, source_rc, source_path, transl_rc, transl_path): |
+ '''Extracts translations from the translated RC file, matching them with |
+ translations in the source RC file to calculate their ID, and correcting |
+ placeholders, limiting output to translateables, etc. using the supplied |
+ .grd file which is the current .grd file for your project. |
+ |
+ If this object's 'limits' attribute is not None but a list, the output of |
+ this function will be further limited to include only messages that have |
+ message IDs in the 'limits' list. |
+ |
+ Args: |
+ current_grd: grit.node.base.Node child, that has had RunGatherers(True) run on it |
+ source_rc: Complete text of source RC file |
+ source_path: Path to the source RC file |
+ transl_rc: Complete text of translated RC file |
+ transl_path: Path to the translated RC file |
+ |
+ Return: |
+ { id1 : text1, '12345678' : 'Hello USERNAME, howzit?' } |
+ ''' |
+ source_grd = self.rc2grd.Process(source_rc, source_path) |
+ self.VerboseOut('Read %s into GRIT format, running gatherers.\n' % source_path) |
+ source_grd.RunGatherers(recursive=True, debug=self.o.extra_verbose) |
+ transl_grd = self.rc2grd.Process(transl_rc, transl_path) |
+ self.VerboseOut('Read %s into GRIT format, running gatherers.\n' % transl_path) |
+ transl_grd.RunGatherers(recursive=True, debug=self.o.extra_verbose) |
+ self.VerboseOut('Done running gatherers for %s.\n' % transl_path) |
+ |
+ # Proceed to create a map from ID to translation, getting the ID from the |
+ # source GRD and the translation from the translated GRD. |
+ id2transl = {} |
+ for source_node in source_grd: |
+ source_cliques = source_node.GetCliques() |
+ if not len(source_cliques): |
+ continue |
+ |
+ assert 'name' in source_node.attrs, 'All nodes with cliques should have an ID' |
+ node_id = source_node.attrs['name'] |
+ self.ExtraVerboseOut('Processing node %s\n' % node_id) |
+ transl_node = transl_grd.GetNodeById(node_id) |
+ |
+ if transl_node: |
+ transl_cliques = transl_node.GetCliques() |
+ if not len(transl_cliques) == len(source_cliques): |
+ self.Out( |
+ 'Warning: Translation for %s has wrong # of cliques, skipping.\n' % |
+ node_id) |
+ continue |
+ else: |
+ self.Out('Warning: No translation for %s, skipping.\n' % node_id) |
+ continue |
+ |
+ if source_node.name == 'message': |
+ # Fixup placeholders as well as possible based on information from |
+ # the current .grd file if they are 'TODO_XXXX' placeholders. We need |
+ # to fixup placeholders in the translated message so that it looks right |
+ # and we also need to fixup placeholders in the source message so that |
+ # its calculated ID will match the current message. |
+ current_node = current_grd.GetNodeById(node_id) |
+ if current_node: |
+ assert len(source_cliques) == 1 and len(current_node.GetCliques()) == 1 |
+ |
+ source_msg = source_cliques[0].GetMessage() |
+ current_msg = current_node.GetCliques()[0].GetMessage() |
+ |
+ # Only do this for messages whose source version has not changed. |
+ if (source_msg.GetRealContent() != current_msg.GetRealContent()): |
+ self.VerboseOut('Info: Message %s has changed; skipping\n' % node_id) |
+ else: |
+ transl_msg = transl_cliques[0].GetMessage() |
+ transl_content = transl_msg.GetContent() |
+ current_content = current_msg.GetContent() |
+ source_content = source_msg.GetContent() |
+ |
+ ok_to_fixup = True |
+ if (len(transl_content) != len(current_content)): |
+ # message structure of translation is different, don't try fixup |
+ ok_to_fixup = False |
+ if ok_to_fixup: |
+ for ix in range(len(transl_content)): |
+ if isinstance(transl_content[ix], tclib.Placeholder): |
+ if not isinstance(current_content[ix], tclib.Placeholder): |
+ ok_to_fixup = False # structure has changed |
+ break |
+ if (transl_content[ix].GetOriginal() != |
+ current_content[ix].GetOriginal()): |
+ ok_to_fixup = False # placeholders have likely been reordered |
+ break |
+ else: # translated part is not a placeholder but a string |
+ if isinstance(current_content[ix], tclib.Placeholder): |
+ ok_to_fixup = False # placeholders have likely been reordered |
+ break |
+ |
+ if not ok_to_fixup: |
+ self.VerboseOut( |
+ 'Info: Structure of message %s has changed; skipping.\n' % node_id) |
+ else: |
+ def Fixup(content, ix): |
+ if (isinstance(content[ix], tclib.Placeholder) and |
+ content[ix].GetPresentation().startswith('TODO_')): |
+ assert isinstance(current_content[ix], tclib.Placeholder) |
+ # Get the placeholder ID and example from the current message |
+ content[ix] = current_content[ix] |
+ for ix in range(len(transl_content)): |
+ Fixup(transl_content, ix) |
+ Fixup(source_content, ix) |
+ |
+ # Only put each translation once into the map. Warn if translations |
+ # for the same message are different. |
+ for ix in range(len(transl_cliques)): |
+ source_msg = source_cliques[ix].GetMessage() |
+ source_msg.GenerateId() # needed to refresh ID based on new placeholders |
+ message_id = source_msg.GetId() |
+ translated_content = transl_cliques[ix].GetMessage().GetPresentableContent() |
+ |
+ if message_id in id2transl: |
+ existing_translation = id2transl[message_id] |
+ if existing_translation != translated_content: |
+ original_text = source_cliques[ix].GetMessage().GetPresentableContent() |
+ self.Out('Warning: Two different translations for "%s":\n' |
+ ' Translation 1: "%s"\n' |
+ ' Translation 2: "%s"\n' % |
+ (original_text, existing_translation, translated_content)) |
+ else: |
+ id2transl[message_id] = translated_content |
+ |
+ # Remove translations for messages that do not occur in the current .grd |
+ # or have been marked as not translateable, or do not occur in the 'limits' |
+ # list (if it has been set). |
+ current_message_ids = current_grd.UberClique().AllMessageIds() |
+ for message_id in id2transl.keys(): |
+ if (message_id not in current_message_ids or |
+ not current_grd.UberClique().BestClique(message_id).IsTranslateable() or |
+ (self.limits and message_id not in self.limits)): |
+ del id2transl[message_id] |
+ |
+ return id2transl |
+ |
+ # static method |
+ def WriteTranslations(output_file, translations): |
+ '''Writes the provided list of translations to the provided output file |
+ in the format used by the TC's Bulk Translation Upload tool. The file |
+ must be UTF-8 encoded. |
+ |
+ Args: |
+ output_file: util.WrapOutputStream(file('bingo.out', 'w')) |
+ translations: [ [id1, text1], ['12345678', 'Hello USERNAME, howzit?'] ] |
+ |
+ Return: |
+ None |
+ ''' |
+ for id, text in translations: |
+ text = text.replace('<', '<').replace('>', '>') |
+ output_file.write(id) |
+ output_file.write(' ') |
+ output_file.write(text) |
+ output_file.write('\n') |
+ WriteTranslations = staticmethod(WriteTranslations) |
+ |
Property changes on: grit/tool/transl2tc.py |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |