OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python2.4 |
| 2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 '''Pseudotranslation support. Our pseudotranslations are based on the |
| 7 P-language, which is a simple vowel-extending language. Examples of P: |
| 8 - "hello" becomes "hepellopo" |
| 9 - "howdie" becomes "hopowdiepie" |
| 10 - "because" becomes "bepecaupause" (but in our implementation we don't |
| 11 handle the silent e at the end so it actually would return "bepecaupausepe" |
| 12 |
| 13 The P-language has the excellent quality of increasing the length of text |
| 14 by around 30-50% which is great for pseudotranslations, to stress test any |
| 15 GUI layouts etc. |
| 16 |
| 17 To make the pseudotranslations more obviously "not a translation" and to make |
| 18 them exercise any code that deals with encodings, we also transform all English |
| 19 vowels into equivalent vowels with diacriticals on them (rings, acutes, |
| 20 diaresis, and circumflex), and we write the "p" in the P-language as a Hebrew |
| 21 character Qof. It looks sort of like a latin character "p" but it is outside |
| 22 the latin-1 character set which will stress character encoding bugs. |
| 23 ''' |
| 24 |
| 25 import re |
| 26 import types |
| 27 |
| 28 from grit import tclib |
| 29 |
| 30 |
| 31 # An RFC language code for the P pseudolanguage. |
| 32 PSEUDO_LANG = 'x-P-pseudo' |
| 33 |
| 34 # Hebrew character Qof. It looks kind of like a 'p' but is outside |
| 35 # the latin-1 character set which is good for our purposes. |
| 36 # TODO(joi) For now using P instead of Qof, because of some bugs it used. Find |
| 37 # a better solution, i.e. one that introduces a non-latin1 character into the |
| 38 # pseudotranslation. |
| 39 #_QOF = u'\u05e7' |
| 40 _QOF = u'P' |
| 41 |
| 42 # How we map each vowel. |
| 43 _VOWELS = { |
| 44 u'a' : u'\u00e5', # a with ring |
| 45 u'e' : u'\u00e9', # e acute |
| 46 u'i' : u'\u00ef', # i diaresis |
| 47 u'o' : u'\u00f4', # o circumflex |
| 48 u'u' : u'\u00fc', # u diaresis |
| 49 u'y' : u'\u00fd', # y acute |
| 50 u'A' : u'\u00c5', # A with ring |
| 51 u'E' : u'\u00c9', # E acute |
| 52 u'I' : u'\u00cf', # I diaresis |
| 53 u'O' : u'\u00d4', # O circumflex |
| 54 u'U' : u'\u00dc', # U diaresis |
| 55 u'Y' : u'\u00dd', # Y acute |
| 56 } |
| 57 |
| 58 # Matches vowels and P |
| 59 _PSUB_RE = re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P'])) |
| 60 |
| 61 |
| 62 # Pseudotranslations previously created. This is important for performance |
| 63 # reasons, especially since we routinely pseudotranslate the whole project |
| 64 # several or many different times for each build. |
| 65 _existing_translations = {} |
| 66 |
| 67 |
| 68 def MapVowels(str, also_p = False): |
| 69 '''Returns a copy of 'str' where characters that exist as keys in _VOWELS |
| 70 have been replaced with the corresponding value. If also_p is true, this |
| 71 function will also change capital P characters into a Hebrew character Qof. |
| 72 ''' |
| 73 def Repl(match): |
| 74 if match.group() == 'p': |
| 75 if also_p: |
| 76 return _QOF |
| 77 else: |
| 78 return 'p' |
| 79 else: |
| 80 return _VOWELS[match.group()] |
| 81 return _PSUB_RE.sub(Repl, str) |
| 82 |
| 83 |
| 84 def PseudoString(str): |
| 85 '''Returns a pseudotranslation of the provided string, in our enhanced |
| 86 P-language.''' |
| 87 if str in _existing_translations: |
| 88 return _existing_translations[str] |
| 89 |
| 90 outstr = u'' |
| 91 ix = 0 |
| 92 while ix < len(str): |
| 93 if str[ix] not in _VOWELS.keys(): |
| 94 outstr += str[ix] |
| 95 ix += 1 |
| 96 else: |
| 97 # We want to treat consecutive vowels as one composite vowel. This is not |
| 98 # always accurate e.g. in composite words but good enough. |
| 99 consecutive_vowels = u'' |
| 100 while ix < len(str) and str[ix] in _VOWELS.keys(): |
| 101 consecutive_vowels += str[ix] |
| 102 ix += 1 |
| 103 changed_vowels = MapVowels(consecutive_vowels) |
| 104 outstr += changed_vowels |
| 105 outstr += _QOF |
| 106 outstr += changed_vowels |
| 107 |
| 108 _existing_translations[str] = outstr |
| 109 return outstr |
| 110 |
| 111 |
| 112 def PseudoMessage(message): |
| 113 '''Returns a pseudotranslation of the provided message. |
| 114 |
| 115 Args: |
| 116 message: tclib.Message() |
| 117 |
| 118 Return: |
| 119 tclib.Translation() |
| 120 ''' |
| 121 transl = tclib.Translation() |
| 122 |
| 123 for part in message.GetContent(): |
| 124 if isinstance(part, tclib.Placeholder): |
| 125 transl.AppendPlaceholder(part) |
| 126 else: |
| 127 transl.AppendText(PseudoString(part)) |
| 128 |
| 129 return transl |
| 130 |
OLD | NEW |