Index: grit/pseudo.py |
=================================================================== |
--- grit/pseudo.py (revision 0) |
+++ grit/pseudo.py (revision 0) |
@@ -0,0 +1,130 @@ |
+#!/usr/bin/python2.4 |
+# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+'''Pseudotranslation support. Our pseudotranslations are based on the |
+P-language, which is a simple vowel-extending language. Examples of P: |
+ - "hello" becomes "hepellopo" |
+ - "howdie" becomes "hopowdiepie" |
+ - "because" becomes "bepecaupause" (but in our implementation we don't |
+ handle the silent e at the end so it actually would return "bepecaupausepe" |
+ |
+The P-language has the excellent quality of increasing the length of text |
+by around 30-50% which is great for pseudotranslations, to stress test any |
+GUI layouts etc. |
+ |
+To make the pseudotranslations more obviously "not a translation" and to make |
+them exercise any code that deals with encodings, we also transform all English |
+vowels into equivalent vowels with diacriticals on them (rings, acutes, |
+diaresis, and circumflex), and we write the "p" in the P-language as a Hebrew |
+character Qof. It looks sort of like a latin character "p" but it is outside |
+the latin-1 character set which will stress character encoding bugs. |
+''' |
+ |
+import re |
+import types |
+ |
+from grit import tclib |
+ |
+ |
+# An RFC language code for the P pseudolanguage. |
+PSEUDO_LANG = 'x-P-pseudo' |
+ |
+# Hebrew character Qof. It looks kind of like a 'p' but is outside |
+# the latin-1 character set which is good for our purposes. |
+# TODO(joi) For now using P instead of Qof, because of some bugs it used. Find |
+# a better solution, i.e. one that introduces a non-latin1 character into the |
+# pseudotranslation. |
+#_QOF = u'\u05e7' |
+_QOF = u'P' |
+ |
+# How we map each vowel. |
+_VOWELS = { |
+ u'a' : u'\u00e5', # a with ring |
+ u'e' : u'\u00e9', # e acute |
+ u'i' : u'\u00ef', # i diaresis |
+ u'o' : u'\u00f4', # o circumflex |
+ u'u' : u'\u00fc', # u diaresis |
+ u'y' : u'\u00fd', # y acute |
+ u'A' : u'\u00c5', # A with ring |
+ u'E' : u'\u00c9', # E acute |
+ u'I' : u'\u00cf', # I diaresis |
+ u'O' : u'\u00d4', # O circumflex |
+ u'U' : u'\u00dc', # U diaresis |
+ u'Y' : u'\u00dd', # Y acute |
+} |
+ |
+# Matches vowels and P |
+_PSUB_RE = re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P'])) |
+ |
+ |
+# Pseudotranslations previously created. This is important for performance |
+# reasons, especially since we routinely pseudotranslate the whole project |
+# several or many different times for each build. |
+_existing_translations = {} |
+ |
+ |
+def MapVowels(str, also_p = False): |
+ '''Returns a copy of 'str' where characters that exist as keys in _VOWELS |
+ have been replaced with the corresponding value. If also_p is true, this |
+ function will also change capital P characters into a Hebrew character Qof. |
+ ''' |
+ def Repl(match): |
+ if match.group() == 'p': |
+ if also_p: |
+ return _QOF |
+ else: |
+ return 'p' |
+ else: |
+ return _VOWELS[match.group()] |
+ return _PSUB_RE.sub(Repl, str) |
+ |
+ |
+def PseudoString(str): |
+ '''Returns a pseudotranslation of the provided string, in our enhanced |
+ P-language.''' |
+ if str in _existing_translations: |
+ return _existing_translations[str] |
+ |
+ outstr = u'' |
+ ix = 0 |
+ while ix < len(str): |
+ if str[ix] not in _VOWELS.keys(): |
+ outstr += str[ix] |
+ ix += 1 |
+ else: |
+ # We want to treat consecutive vowels as one composite vowel. This is not |
+ # always accurate e.g. in composite words but good enough. |
+ consecutive_vowels = u'' |
+ while ix < len(str) and str[ix] in _VOWELS.keys(): |
+ consecutive_vowels += str[ix] |
+ ix += 1 |
+ changed_vowels = MapVowels(consecutive_vowels) |
+ outstr += changed_vowels |
+ outstr += _QOF |
+ outstr += changed_vowels |
+ |
+ _existing_translations[str] = outstr |
+ return outstr |
+ |
+ |
+def PseudoMessage(message): |
+ '''Returns a pseudotranslation of the provided message. |
+ |
+ Args: |
+ message: tclib.Message() |
+ |
+ Return: |
+ tclib.Translation() |
+ ''' |
+ transl = tclib.Translation() |
+ |
+ for part in message.GetContent(): |
+ if isinstance(part, tclib.Placeholder): |
+ transl.AppendPlaceholder(part) |
+ else: |
+ transl.AppendText(PseudoString(part)) |
+ |
+ return transl |
+ |
Property changes on: grit/pseudo.py |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |