Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1159)

Unified Diff: grit/tool/transl2tc.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « grit/tool/toolbar_preprocess.py ('k') | grit/tool/transl2tc_unittest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: grit/tool/transl2tc.py
===================================================================
--- grit/tool/transl2tc.py (revision 0)
+++ grit/tool/transl2tc.py (revision 0)
@@ -0,0 +1,254 @@
+#!/usr/bin/python2.4
+# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+'''The 'grit transl2tc' tool.
+'''
+
+
+import getopt
+
+from grit.tool import interface
+from grit.tool import rc2grd
+from grit import grd_reader
+from grit import util
+
+from grit.extern import tclib
+
+
+class TranslationToTc(interface.Tool):
+ '''A tool for importing existing translations in RC format into the
+Translation Console.
+
+Usage:
+
+grit -i GRD transl2tc [-l LIMITS] [RCOPTS] SOURCE_RC TRANSLATED_RC OUT_FILE
+
+The tool needs a "source" RC file, i.e. in English, and an RC file that is a
+translation of precisely the source RC file (not of an older or newer version).
+
+The tool also requires you to provide a .grd file (input file) e.g. using the
+-i global option or the GRIT_INPUT environment variable. The tool uses
+information from your .grd file to correct placeholder names in the
+translations and ensure that only translatable items and translations still
+being used are output.
+
+This tool will accept all the same RCOPTS as the 'grit rc2grd' tool. To get
+a list of these options, run 'grit help rc2grd'.
+
+Additionally, you can use the -l option (which must be the first option to the
+tool) to specify a file containing a list of message IDs to which output should
+be limited. This is only useful if you are limiting the output to your XMB
+files using the 'grit xmb' tool's -l option. See 'grit help xmb' for how to
+generate a file containing a list of the message IDs in an XMB file.
+
+The tool will scan through both of the RC files as well as any HTML files they
+refer to, and match together the source messages and translated messages. It
+will output a file (OUTPUT_FILE) you can import directly into the TC using the
+Bulk Translation Upload tool.
+'''
+
+ def ShortDescription(self):
+ return 'Import existing translations in RC format into the TC'
+
+ def Setup(self, globopt, args):
+ '''Sets the instance up for use.
+ '''
+ self.SetOptions(globopt)
+ self.rc2grd = rc2grd.Rc2Grd()
+ self.rc2grd.SetOptions(globopt)
+ self.limits = None
+ if len(args) and args[0] == '-l':
+ limit_file = file(args[1])
+ self.limits = limit_file.read().split('\n')
+ limit_file.close()
+ args = args[2:]
+ return self.rc2grd.ParseOptions(args)
+
+ def Run(self, globopt, args):
+ args = self.Setup(globopt, args)
+
+ if len(args) != 3:
+ self.Out('This tool takes exactly three arguments:\n'
+ ' 1. The path to the original RC file\n'
+ ' 2. The path to the translated RC file\n'
+ ' 3. The output file path.\n')
+ return 2
+
+ grd = grd_reader.Parse(self.o.input, debug=self.o.extra_verbose)
+ grd.RunGatherers(recursive = True)
+
+ source_rc = util.WrapInputStream(file(args[0], 'r'), self.rc2grd.input_encoding)
+ transl_rc = util.WrapInputStream(file(args[1], 'r'), self.rc2grd.input_encoding)
+ translations = self.ExtractTranslations(grd,
+ source_rc.read(), args[0],
+ transl_rc.read(), args[1])
+ transl_rc.close()
+ source_rc.close()
+
+ output_file = util.WrapOutputStream(file(args[2], 'w'))
+ self.WriteTranslations(output_file, translations.items())
+ output_file.close()
+
+ self.Out('Wrote output file %s' % args[2])
+
+ def ExtractTranslations(self, current_grd, source_rc, source_path, transl_rc, transl_path):
+ '''Extracts translations from the translated RC file, matching them with
+ translations in the source RC file to calculate their ID, and correcting
+ placeholders, limiting output to translateables, etc. using the supplied
+ .grd file which is the current .grd file for your project.
+
+ If this object's 'limits' attribute is not None but a list, the output of
+ this function will be further limited to include only messages that have
+ message IDs in the 'limits' list.
+
+ Args:
+ current_grd: grit.node.base.Node child, that has had RunGatherers(True) run on it
+ source_rc: Complete text of source RC file
+ source_path: Path to the source RC file
+ transl_rc: Complete text of translated RC file
+ transl_path: Path to the translated RC file
+
+ Return:
+ { id1 : text1, '12345678' : 'Hello USERNAME, howzit?' }
+ '''
+ source_grd = self.rc2grd.Process(source_rc, source_path)
+ self.VerboseOut('Read %s into GRIT format, running gatherers.\n' % source_path)
+ source_grd.RunGatherers(recursive=True, debug=self.o.extra_verbose)
+ transl_grd = self.rc2grd.Process(transl_rc, transl_path)
+ self.VerboseOut('Read %s into GRIT format, running gatherers.\n' % transl_path)
+ transl_grd.RunGatherers(recursive=True, debug=self.o.extra_verbose)
+ self.VerboseOut('Done running gatherers for %s.\n' % transl_path)
+
+ # Proceed to create a map from ID to translation, getting the ID from the
+ # source GRD and the translation from the translated GRD.
+ id2transl = {}
+ for source_node in source_grd:
+ source_cliques = source_node.GetCliques()
+ if not len(source_cliques):
+ continue
+
+ assert 'name' in source_node.attrs, 'All nodes with cliques should have an ID'
+ node_id = source_node.attrs['name']
+ self.ExtraVerboseOut('Processing node %s\n' % node_id)
+ transl_node = transl_grd.GetNodeById(node_id)
+
+ if transl_node:
+ transl_cliques = transl_node.GetCliques()
+ if not len(transl_cliques) == len(source_cliques):
+ self.Out(
+ 'Warning: Translation for %s has wrong # of cliques, skipping.\n' %
+ node_id)
+ continue
+ else:
+ self.Out('Warning: No translation for %s, skipping.\n' % node_id)
+ continue
+
+ if source_node.name == 'message':
+ # Fixup placeholders as well as possible based on information from
+ # the current .grd file if they are 'TODO_XXXX' placeholders. We need
+ # to fixup placeholders in the translated message so that it looks right
+ # and we also need to fixup placeholders in the source message so that
+ # its calculated ID will match the current message.
+ current_node = current_grd.GetNodeById(node_id)
+ if current_node:
+ assert len(source_cliques) == 1 and len(current_node.GetCliques()) == 1
+
+ source_msg = source_cliques[0].GetMessage()
+ current_msg = current_node.GetCliques()[0].GetMessage()
+
+ # Only do this for messages whose source version has not changed.
+ if (source_msg.GetRealContent() != current_msg.GetRealContent()):
+ self.VerboseOut('Info: Message %s has changed; skipping\n' % node_id)
+ else:
+ transl_msg = transl_cliques[0].GetMessage()
+ transl_content = transl_msg.GetContent()
+ current_content = current_msg.GetContent()
+ source_content = source_msg.GetContent()
+
+ ok_to_fixup = True
+ if (len(transl_content) != len(current_content)):
+ # message structure of translation is different, don't try fixup
+ ok_to_fixup = False
+ if ok_to_fixup:
+ for ix in range(len(transl_content)):
+ if isinstance(transl_content[ix], tclib.Placeholder):
+ if not isinstance(current_content[ix], tclib.Placeholder):
+ ok_to_fixup = False # structure has changed
+ break
+ if (transl_content[ix].GetOriginal() !=
+ current_content[ix].GetOriginal()):
+ ok_to_fixup = False # placeholders have likely been reordered
+ break
+ else: # translated part is not a placeholder but a string
+ if isinstance(current_content[ix], tclib.Placeholder):
+ ok_to_fixup = False # placeholders have likely been reordered
+ break
+
+ if not ok_to_fixup:
+ self.VerboseOut(
+ 'Info: Structure of message %s has changed; skipping.\n' % node_id)
+ else:
+ def Fixup(content, ix):
+ if (isinstance(content[ix], tclib.Placeholder) and
+ content[ix].GetPresentation().startswith('TODO_')):
+ assert isinstance(current_content[ix], tclib.Placeholder)
+ # Get the placeholder ID and example from the current message
+ content[ix] = current_content[ix]
+ for ix in range(len(transl_content)):
+ Fixup(transl_content, ix)
+ Fixup(source_content, ix)
+
+ # Only put each translation once into the map. Warn if translations
+ # for the same message are different.
+ for ix in range(len(transl_cliques)):
+ source_msg = source_cliques[ix].GetMessage()
+ source_msg.GenerateId() # needed to refresh ID based on new placeholders
+ message_id = source_msg.GetId()
+ translated_content = transl_cliques[ix].GetMessage().GetPresentableContent()
+
+ if message_id in id2transl:
+ existing_translation = id2transl[message_id]
+ if existing_translation != translated_content:
+ original_text = source_cliques[ix].GetMessage().GetPresentableContent()
+ self.Out('Warning: Two different translations for "%s":\n'
+ ' Translation 1: "%s"\n'
+ ' Translation 2: "%s"\n' %
+ (original_text, existing_translation, translated_content))
+ else:
+ id2transl[message_id] = translated_content
+
+ # Remove translations for messages that do not occur in the current .grd
+ # or have been marked as not translateable, or do not occur in the 'limits'
+ # list (if it has been set).
+ current_message_ids = current_grd.UberClique().AllMessageIds()
+ for message_id in id2transl.keys():
+ if (message_id not in current_message_ids or
+ not current_grd.UberClique().BestClique(message_id).IsTranslateable() or
+ (self.limits and message_id not in self.limits)):
+ del id2transl[message_id]
+
+ return id2transl
+
+ # static method
+ def WriteTranslations(output_file, translations):
+ '''Writes the provided list of translations to the provided output file
+ in the format used by the TC's Bulk Translation Upload tool. The file
+ must be UTF-8 encoded.
+
+ Args:
+ output_file: util.WrapOutputStream(file('bingo.out', 'w'))
+ translations: [ [id1, text1], ['12345678', 'Hello USERNAME, howzit?'] ]
+
+ Return:
+ None
+ '''
+ for id, text in translations:
+ text = text.replace('<', '&lt;').replace('>', '&gt;')
+ output_file.write(id)
+ output_file.write(' ')
+ output_file.write(text)
+ output_file.write('\n')
+ WriteTranslations = staticmethod(WriteTranslations)
+
Property changes on: grit/tool/transl2tc.py
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « grit/tool/toolbar_preprocess.py ('k') | grit/tool/transl2tc_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698