OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 '''Pseudotranslation support. Our pseudotranslations are based on the | |
7 P-language, which is a simple vowel-extending language. Examples of P: | |
8 - "hello" becomes "hepellopo" | |
9 - "howdie" becomes "hopowdiepie" | |
10 - "because" becomes "bepecaupause" (but in our implementation we don't | |
11 handle the silent e at the end so it actually would return "bepecaupausepe" | |
12 | |
13 The P-language has the excellent quality of increasing the length of text | |
14 by around 30-50% which is great for pseudotranslations, to stress test any | |
15 GUI layouts etc. | |
16 | |
17 To make the pseudotranslations more obviously "not a translation" and to make | |
18 them exercise any code that deals with encodings, we also transform all English | |
19 vowels into equivalent vowels with diacriticals on them (rings, acutes, | |
20 diaresis, and circumflex), and we write the "p" in the P-language as a Hebrew | |
21 character Qof. It looks sort of like a latin character "p" but it is outside | |
22 the latin-1 character set which will stress character encoding bugs. | |
23 ''' | |
24 | |
25 from grit import lazy_re | |
26 from grit import tclib | |
27 | |
28 | |
29 # An RFC language code for the P pseudolanguage. | |
30 PSEUDO_LANG = 'x-P-pseudo' | |
31 | |
32 # Hebrew character Qof. It looks kind of like a 'p' but is outside | |
33 # the latin-1 character set which is good for our purposes. | |
34 # TODO(joi) For now using P instead of Qof, because of some bugs it used. Find | |
35 # a better solution, i.e. one that introduces a non-latin1 character into the | |
36 # pseudotranslation. | |
37 #_QOF = u'\u05e7' | |
38 _QOF = u'P' | |
39 | |
40 # How we map each vowel. | |
41 _VOWELS = { | |
42 u'a' : u'\u00e5', # a with ring | |
43 u'e' : u'\u00e9', # e acute | |
44 u'i' : u'\u00ef', # i diaresis | |
45 u'o' : u'\u00f4', # o circumflex | |
46 u'u' : u'\u00fc', # u diaresis | |
47 u'y' : u'\u00fd', # y acute | |
48 u'A' : u'\u00c5', # A with ring | |
49 u'E' : u'\u00c9', # E acute | |
50 u'I' : u'\u00cf', # I diaresis | |
51 u'O' : u'\u00d4', # O circumflex | |
52 u'U' : u'\u00dc', # U diaresis | |
53 u'Y' : u'\u00dd', # Y acute | |
54 } | |
55 | |
56 # Matches vowels and P | |
57 _PSUB_RE = lazy_re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P'])) | |
58 | |
59 | |
60 # Pseudotranslations previously created. This is important for performance | |
61 # reasons, especially since we routinely pseudotranslate the whole project | |
62 # several or many different times for each build. | |
63 _existing_translations = {} | |
64 | |
65 | |
66 def MapVowels(str, also_p = False): | |
67 '''Returns a copy of 'str' where characters that exist as keys in _VOWELS | |
68 have been replaced with the corresponding value. If also_p is true, this | |
69 function will also change capital P characters into a Hebrew character Qof. | |
70 ''' | |
71 def Repl(match): | |
72 if match.group() == 'p': | |
73 if also_p: | |
74 return _QOF | |
75 else: | |
76 return 'p' | |
77 else: | |
78 return _VOWELS[match.group()] | |
79 return _PSUB_RE.sub(Repl, str) | |
80 | |
81 | |
82 def PseudoString(str): | |
83 '''Returns a pseudotranslation of the provided string, in our enhanced | |
84 P-language.''' | |
85 if str in _existing_translations: | |
86 return _existing_translations[str] | |
87 | |
88 outstr = u'' | |
89 ix = 0 | |
90 while ix < len(str): | |
91 if str[ix] not in _VOWELS.keys(): | |
92 outstr += str[ix] | |
93 ix += 1 | |
94 else: | |
95 # We want to treat consecutive vowels as one composite vowel. This is not | |
96 # always accurate e.g. in composite words but good enough. | |
97 consecutive_vowels = u'' | |
98 while ix < len(str) and str[ix] in _VOWELS.keys(): | |
99 consecutive_vowels += str[ix] | |
100 ix += 1 | |
101 changed_vowels = MapVowels(consecutive_vowels) | |
102 outstr += changed_vowels | |
103 outstr += _QOF | |
104 outstr += changed_vowels | |
105 | |
106 _existing_translations[str] = outstr | |
107 return outstr | |
108 | |
109 | |
110 def PseudoMessage(message): | |
111 '''Returns a pseudotranslation of the provided message. | |
112 | |
113 Args: | |
114 message: tclib.Message() | |
115 | |
116 Return: | |
117 tclib.Translation() | |
118 ''' | |
119 transl = tclib.Translation() | |
120 | |
121 for part in message.GetContent(): | |
122 if isinstance(part, tclib.Placeholder): | |
123 transl.AppendPlaceholder(part) | |
124 else: | |
125 transl.AppendText(PseudoString(part)) | |
126 | |
127 return transl | |
128 | |
OLD | NEW |