| Index: grit/pseudo.py
|
| ===================================================================
|
| --- grit/pseudo.py (revision 0)
|
| +++ grit/pseudo.py (revision 0)
|
| @@ -0,0 +1,130 @@
|
| +#!/usr/bin/python2.4
|
| +# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +'''Pseudotranslation support. Our pseudotranslations are based on the
|
| +P-language, which is a simple vowel-extending language. Examples of P:
|
| + - "hello" becomes "hepellopo"
|
| + - "howdie" becomes "hopowdiepie"
|
| + - "because" becomes "bepecaupause" (but in our implementation we don't
|
| + handle the silent e at the end so it actually would return "bepecaupausepe"
|
| +
|
| +The P-language has the excellent quality of increasing the length of text
|
| +by around 30-50% which is great for pseudotranslations, to stress test any
|
| +GUI layouts etc.
|
| +
|
| +To make the pseudotranslations more obviously "not a translation" and to make
|
| +them exercise any code that deals with encodings, we also transform all English
|
| +vowels into equivalent vowels with diacriticals on them (rings, acutes,
|
| +diaresis, and circumflex), and we write the "p" in the P-language as a Hebrew
|
| +character Qof. It looks sort of like a latin character "p" but it is outside
|
| +the latin-1 character set which will stress character encoding bugs.
|
| +'''
|
| +
|
| +import re
|
| +import types
|
| +
|
| +from grit import tclib
|
| +
|
| +
|
| +# An RFC language code for the P pseudolanguage.
|
| +PSEUDO_LANG = 'x-P-pseudo'
|
| +
|
| +# Hebrew character Qof. It looks kind of like a 'p' but is outside
|
| +# the latin-1 character set which is good for our purposes.
|
| +# TODO(joi) For now using P instead of Qof, because of some bugs it used. Find
|
| +# a better solution, i.e. one that introduces a non-latin1 character into the
|
| +# pseudotranslation.
|
| +#_QOF = u'\u05e7'
|
| +_QOF = u'P'
|
| +
|
| +# How we map each vowel.
|
| +_VOWELS = {
|
| + u'a' : u'\u00e5', # a with ring
|
| + u'e' : u'\u00e9', # e acute
|
| + u'i' : u'\u00ef', # i diaresis
|
| + u'o' : u'\u00f4', # o circumflex
|
| + u'u' : u'\u00fc', # u diaresis
|
| + u'y' : u'\u00fd', # y acute
|
| + u'A' : u'\u00c5', # A with ring
|
| + u'E' : u'\u00c9', # E acute
|
| + u'I' : u'\u00cf', # I diaresis
|
| + u'O' : u'\u00d4', # O circumflex
|
| + u'U' : u'\u00dc', # U diaresis
|
| + u'Y' : u'\u00dd', # Y acute
|
| +}
|
| +
|
| +# Matches vowels and P
|
| +_PSUB_RE = re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P']))
|
| +
|
| +
|
| +# Pseudotranslations previously created. This is important for performance
|
| +# reasons, especially since we routinely pseudotranslate the whole project
|
| +# several or many different times for each build.
|
| +_existing_translations = {}
|
| +
|
| +
|
| +def MapVowels(str, also_p = False):
|
| + '''Returns a copy of 'str' where characters that exist as keys in _VOWELS
|
| + have been replaced with the corresponding value. If also_p is true, this
|
| + function will also change capital P characters into a Hebrew character Qof.
|
| + '''
|
| + def Repl(match):
|
| + if match.group() == 'p':
|
| + if also_p:
|
| + return _QOF
|
| + else:
|
| + return 'p'
|
| + else:
|
| + return _VOWELS[match.group()]
|
| + return _PSUB_RE.sub(Repl, str)
|
| +
|
| +
|
| +def PseudoString(str):
|
| + '''Returns a pseudotranslation of the provided string, in our enhanced
|
| + P-language.'''
|
| + if str in _existing_translations:
|
| + return _existing_translations[str]
|
| +
|
| + outstr = u''
|
| + ix = 0
|
| + while ix < len(str):
|
| + if str[ix] not in _VOWELS.keys():
|
| + outstr += str[ix]
|
| + ix += 1
|
| + else:
|
| + # We want to treat consecutive vowels as one composite vowel. This is not
|
| + # always accurate e.g. in composite words but good enough.
|
| + consecutive_vowels = u''
|
| + while ix < len(str) and str[ix] in _VOWELS.keys():
|
| + consecutive_vowels += str[ix]
|
| + ix += 1
|
| + changed_vowels = MapVowels(consecutive_vowels)
|
| + outstr += changed_vowels
|
| + outstr += _QOF
|
| + outstr += changed_vowels
|
| +
|
| + _existing_translations[str] = outstr
|
| + return outstr
|
| +
|
| +
|
| +def PseudoMessage(message):
|
| + '''Returns a pseudotranslation of the provided message.
|
| +
|
| + Args:
|
| + message: tclib.Message()
|
| +
|
| + Return:
|
| + tclib.Translation()
|
| + '''
|
| + transl = tclib.Translation()
|
| +
|
| + for part in message.GetContent():
|
| + if isinstance(part, tclib.Placeholder):
|
| + transl.AppendPlaceholder(part)
|
| + else:
|
| + transl.AppendText(PseudoString(part))
|
| +
|
| + return transl
|
| +
|
|
|
| Property changes on: grit/pseudo.py
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|