grit/pseudo.py - Issue 7994004: Initial source commit to grit-i18n project.

Unified Diff: grit/pseudo.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 9 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: grit/pseudo.py

===================================================================

--- grit/pseudo.py (revision 0)

+++ grit/pseudo.py (revision 0)

@@ -0,0 +1,130 @@

+#!/usr/bin/python2.4

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+'''Pseudotranslation support. Our pseudotranslations are based on the

+P-language, which is a simple vowel-extending language. Examples of P:

+ - "hello" becomes "hepellopo"

+ - "howdie" becomes "hopowdiepie"

+ - "because" becomes "bepecaupause" (but in our implementation we don't

+ handle the silent e at the end so it actually would return "bepecaupausepe"

+The P-language has the excellent quality of increasing the length of text

+by around 30-50% which is great for pseudotranslations, to stress test any

+GUI layouts etc.

+To make the pseudotranslations more obviously "not a translation" and to make

+them exercise any code that deals with encodings, we also transform all English

+vowels into equivalent vowels with diacriticals on them (rings, acutes,

+diaresis, and circumflex), and we write the "p" in the P-language as a Hebrew

+character Qof. It looks sort of like a latin character "p" but it is outside

+the latin-1 character set which will stress character encoding bugs.

+'''

+import re

+import types

+from grit import tclib

+# An RFC language code for the P pseudolanguage.

+PSEUDO_LANG = 'x-P-pseudo'

+# Hebrew character Qof. It looks kind of like a 'p' but is outside

+# the latin-1 character set which is good for our purposes.

+# TODO(joi) For now using P instead of Qof, because of some bugs it used. Find

+# a better solution, i.e. one that introduces a non-latin1 character into the

+# pseudotranslation.

+#_QOF = u'\u05e7'

+_QOF = u'P'

+# How we map each vowel.

+_VOWELS = {

+ u'a' : u'\u00e5', # a with ring

+ u'e' : u'\u00e9', # e acute

+ u'i' : u'\u00ef', # i diaresis

+ u'o' : u'\u00f4', # o circumflex

+ u'u' : u'\u00fc', # u diaresis

+ u'y' : u'\u00fd', # y acute

+ u'A' : u'\u00c5', # A with ring

+ u'E' : u'\u00c9', # E acute

+ u'I' : u'\u00cf', # I diaresis

+ u'O' : u'\u00d4', # O circumflex

+ u'U' : u'\u00dc', # U diaresis

+ u'Y' : u'\u00dd', # Y acute

+# Matches vowels and P

+_PSUB_RE = re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P']))

+# Pseudotranslations previously created. This is important for performance

+# reasons, especially since we routinely pseudotranslate the whole project

+# several or many different times for each build.

+_existing_translations = {}

+def MapVowels(str, also_p = False):

+ '''Returns a copy of 'str' where characters that exist as keys in _VOWELS

+ have been replaced with the corresponding value. If also_p is true, this

+ function will also change capital P characters into a Hebrew character Qof.

+ '''

+ def Repl(match):

+ if match.group() == 'p':

+ if also_p:

+ return _QOF

+ else:

+ return 'p'

+ else:

+ return _VOWELS[match.group()]

+ return _PSUB_RE.sub(Repl, str)

+def PseudoString(str):

+ '''Returns a pseudotranslation of the provided string, in our enhanced

+ P-language.'''

+ if str in _existing_translations:

+ return _existing_translations[str]

+ outstr = u''

+ ix = 0

+ while ix < len(str):

+ if str[ix] not in _VOWELS.keys():

+ outstr += str[ix]

+ ix += 1

+ else:

+ # We want to treat consecutive vowels as one composite vowel. This is not

+ # always accurate e.g. in composite words but good enough.

+ consecutive_vowels = u''

+ while ix < len(str) and str[ix] in _VOWELS.keys():

+ consecutive_vowels += str[ix]

+ ix += 1

+ changed_vowels = MapVowels(consecutive_vowels)

+ outstr += changed_vowels

+ outstr += _QOF

+ outstr += changed_vowels

+ _existing_translations[str] = outstr

+ return outstr

+def PseudoMessage(message):

+ '''Returns a pseudotranslation of the provided message.

+ Args:

+ message: tclib.Message()

+ Return:

+ tclib.Translation()

+ '''

+ transl = tclib.Translation()

+ for part in message.GetContent():

+ if isinstance(part, tclib.Placeholder):

+ transl.AppendPlaceholder(part)

+ else:

+ transl.AppendText(PseudoString(part))

+ return transl

Property changes on: grit/pseudo.py

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « grit/node/variant.py ('k') | grit/pseudo_rtl.py » ('j') | no next file with comments »