Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: grit/pseudo.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « grit/node/variant.py ('k') | grit/pseudo_rtl.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 #!/usr/bin/python2.4
2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 '''Pseudotranslation support. Our pseudotranslations are based on the
7 P-language, which is a simple vowel-extending language. Examples of P:
8 - "hello" becomes "hepellopo"
9 - "howdie" becomes "hopowdiepie"
10 - "because" becomes "bepecaupause" (but in our implementation we don't
11 handle the silent e at the end so it actually would return "bepecaupausepe"
12
13 The P-language has the excellent quality of increasing the length of text
14 by around 30-50% which is great for pseudotranslations, to stress test any
15 GUI layouts etc.
16
17 To make the pseudotranslations more obviously "not a translation" and to make
18 them exercise any code that deals with encodings, we also transform all English
19 vowels into equivalent vowels with diacriticals on them (rings, acutes,
20 diaresis, and circumflex), and we write the "p" in the P-language as a Hebrew
21 character Qof. It looks sort of like a latin character "p" but it is outside
22 the latin-1 character set which will stress character encoding bugs.
23 '''
24
25 import re
26 import types
27
28 from grit import tclib
29
30
31 # An RFC language code for the P pseudolanguage.
32 PSEUDO_LANG = 'x-P-pseudo'
33
34 # Hebrew character Qof. It looks kind of like a 'p' but is outside
35 # the latin-1 character set which is good for our purposes.
36 # TODO(joi) For now using P instead of Qof, because of some bugs it used. Find
37 # a better solution, i.e. one that introduces a non-latin1 character into the
38 # pseudotranslation.
39 #_QOF = u'\u05e7'
40 _QOF = u'P'
41
42 # How we map each vowel.
43 _VOWELS = {
44 u'a' : u'\u00e5', # a with ring
45 u'e' : u'\u00e9', # e acute
46 u'i' : u'\u00ef', # i diaresis
47 u'o' : u'\u00f4', # o circumflex
48 u'u' : u'\u00fc', # u diaresis
49 u'y' : u'\u00fd', # y acute
50 u'A' : u'\u00c5', # A with ring
51 u'E' : u'\u00c9', # E acute
52 u'I' : u'\u00cf', # I diaresis
53 u'O' : u'\u00d4', # O circumflex
54 u'U' : u'\u00dc', # U diaresis
55 u'Y' : u'\u00dd', # Y acute
56 }
57
58 # Matches vowels and P
59 _PSUB_RE = re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P']))
60
61
62 # Pseudotranslations previously created. This is important for performance
63 # reasons, especially since we routinely pseudotranslate the whole project
64 # several or many different times for each build.
65 _existing_translations = {}
66
67
68 def MapVowels(str, also_p = False):
69 '''Returns a copy of 'str' where characters that exist as keys in _VOWELS
70 have been replaced with the corresponding value. If also_p is true, this
71 function will also change capital P characters into a Hebrew character Qof.
72 '''
73 def Repl(match):
74 if match.group() == 'p':
75 if also_p:
76 return _QOF
77 else:
78 return 'p'
79 else:
80 return _VOWELS[match.group()]
81 return _PSUB_RE.sub(Repl, str)
82
83
84 def PseudoString(str):
85 '''Returns a pseudotranslation of the provided string, in our enhanced
86 P-language.'''
87 if str in _existing_translations:
88 return _existing_translations[str]
89
90 outstr = u''
91 ix = 0
92 while ix < len(str):
93 if str[ix] not in _VOWELS.keys():
94 outstr += str[ix]
95 ix += 1
96 else:
97 # We want to treat consecutive vowels as one composite vowel. This is not
98 # always accurate e.g. in composite words but good enough.
99 consecutive_vowels = u''
100 while ix < len(str) and str[ix] in _VOWELS.keys():
101 consecutive_vowels += str[ix]
102 ix += 1
103 changed_vowels = MapVowels(consecutive_vowels)
104 outstr += changed_vowels
105 outstr += _QOF
106 outstr += changed_vowels
107
108 _existing_translations[str] = outstr
109 return outstr
110
111
112 def PseudoMessage(message):
113 '''Returns a pseudotranslation of the provided message.
114
115 Args:
116 message: tclib.Message()
117
118 Return:
119 tclib.Translation()
120 '''
121 transl = tclib.Translation()
122
123 for part in message.GetContent():
124 if isinstance(part, tclib.Placeholder):
125 transl.AppendPlaceholder(part)
126 else:
127 transl.AppendText(PseudoString(part))
128
129 return transl
130
OLDNEW
« no previous file with comments | « grit/node/variant.py ('k') | grit/pseudo_rtl.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698