grit/pseudo.py - Issue 7994004: Initial source commit to grit-i18n project.

Side by Side Diff: grit/pseudo.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 9 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 #!/usr/bin/python2.4

	2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 '''Pseudotranslation support. Our pseudotranslations are based on the

	7 P-language, which is a simple vowel-extending language. Examples of P:

	8 - "hello" becomes "hepellopo"

	9 - "howdie" becomes "hopowdiepie"

	10 - "because" becomes "bepecaupause" (but in our implementation we don't

	11 handle the silent e at the end so it actually would return "bepecaupausepe"

	12

	13 The P-language has the excellent quality of increasing the length of text

	14 by around 30-50% which is great for pseudotranslations, to stress test any

	15 GUI layouts etc.

	16

	17 To make the pseudotranslations more obviously "not a translation" and to make

	18 them exercise any code that deals with encodings, we also transform all English

	19 vowels into equivalent vowels with diacriticals on them (rings, acutes,

	20 diaresis, and circumflex), and we write the "p" in the P-language as a Hebrew

	21 character Qof. It looks sort of like a latin character "p" but it is outside

	22 the latin-1 character set which will stress character encoding bugs.

	23 '''

	24

	25 import re

	26 import types

	27

	28 from grit import tclib

	29

	30

	31 # An RFC language code for the P pseudolanguage.

	32 PSEUDO_LANG = 'x-P-pseudo'

	33

	34 # Hebrew character Qof. It looks kind of like a 'p' but is outside

	35 # the latin-1 character set which is good for our purposes.

	36 # TODO(joi) For now using P instead of Qof, because of some bugs it used. Find

	37 # a better solution, i.e. one that introduces a non-latin1 character into the

	38 # pseudotranslation.

	39 #_QOF = u'\u05e7'

	40 _QOF = u'P'

	41

	42 # How we map each vowel.

	43 _VOWELS = {

	44 u'a' : u'\u00e5', # a with ring

	45 u'e' : u'\u00e9', # e acute

	46 u'i' : u'\u00ef', # i diaresis

	47 u'o' : u'\u00f4', # o circumflex

	48 u'u' : u'\u00fc', # u diaresis

	49 u'y' : u'\u00fd', # y acute

	50 u'A' : u'\u00c5', # A with ring

	51 u'E' : u'\u00c9', # E acute

	52 u'I' : u'\u00cf', # I diaresis

	53 u'O' : u'\u00d4', # O circumflex

	54 u'U' : u'\u00dc', # U diaresis

	55 u'Y' : u'\u00dd', # Y acute

	56 }

	57

	58 # Matches vowels and P

	59 _PSUB_RE = re.compile("(%s)" % '\|'.join(_VOWELS.keys() + ['P']))

	60

	61

	62 # Pseudotranslations previously created. This is important for performance

	63 # reasons, especially since we routinely pseudotranslate the whole project

	64 # several or many different times for each build.

	65 _existing_translations = {}

	66

	67

	68 def MapVowels(str, also_p = False):

	69 '''Returns a copy of 'str' where characters that exist as keys in _VOWELS

	70 have been replaced with the corresponding value. If also_p is true, this

	71 function will also change capital P characters into a Hebrew character Qof.

	72 '''

	73 def Repl(match):

	74 if match.group() == 'p':

	75 if also_p:

	76 return _QOF

	77 else:

	78 return 'p'

	79 else:

	80 return _VOWELS[match.group()]

	81 return _PSUB_RE.sub(Repl, str)

	82

	83

	84 def PseudoString(str):

	85 '''Returns a pseudotranslation of the provided string, in our enhanced

	86 P-language.'''

	87 if str in _existing_translations:

	88 return _existing_translations[str]

	89

	90 outstr = u''

	91 ix = 0

	92 while ix < len(str):

	93 if str[ix] not in _VOWELS.keys():

	94 outstr += str[ix]

	95 ix += 1

	96 else:

	97 # We want to treat consecutive vowels as one composite vowel. This is not

	98 # always accurate e.g. in composite words but good enough.

	99 consecutive_vowels = u''

	100 while ix < len(str) and str[ix] in _VOWELS.keys():

	101 consecutive_vowels += str[ix]

	102 ix += 1

	103 changed_vowels = MapVowels(consecutive_vowels)

	104 outstr += changed_vowels

	105 outstr += _QOF

	106 outstr += changed_vowels

	107

	108 _existing_translations[str] = outstr

	109 return outstr

	110

	111

	112 def PseudoMessage(message):

	113 '''Returns a pseudotranslation of the provided message.

	114

	115 Args:

	116 message: tclib.Message()

	117

	118 Return:

	119 tclib.Translation()

	120 '''

	121 transl = tclib.Translation()

	122

	123 for part in message.GetContent():

	124 if isinstance(part, tclib.Placeholder):

	125 transl.AppendPlaceholder(part)

	126 else:

	127 transl.AppendText(PseudoString(part))

	128

	129 return transl

	130

OLD	NEW

« no previous file with comments | « grit/node/variant.py ('k') | grit/pseudo_rtl.py » ('j') | no next file with comments »