OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 '''Collections of messages and their translations, called cliques. Also | |
7 collections of cliques (uber-cliques). | |
8 ''' | |
9 | |
10 import re | |
11 import types | |
12 | |
13 from grit import constants | |
14 from grit import exception | |
15 from grit import lazy_re | |
16 from grit import pseudo | |
17 from grit import pseudo_rtl | |
18 from grit import tclib | |
19 | |
20 | |
21 class UberClique(object): | |
22 '''A factory (NOT a singleton factory) for making cliques. It has several | |
23 methods for working with the cliques created using the factory. | |
24 ''' | |
25 | |
26 def __init__(self): | |
27 # A map from message ID to list of cliques whose source messages have | |
28 # that ID. This will contain all cliques created using this factory. | |
29 # Different messages can have the same ID because they have the | |
30 # same translateable portion and placeholder names, but occur in different | |
31 # places in the resource tree. | |
32 # | |
33 # Each list of cliques is kept sorted by description, to achieve | |
34 # stable results from the BestClique method, see below. | |
35 self.cliques_ = {} | |
36 | |
37 # A map of clique IDs to list of languages to indicate translations where we | |
38 # fell back to English. | |
39 self.fallback_translations_ = {} | |
40 | |
41 # A map of clique IDs to list of languages to indicate missing translations. | |
42 self.missing_translations_ = {} | |
43 | |
44 def _AddMissingTranslation(self, lang, clique, is_error): | |
45 tl = self.fallback_translations_ | |
46 if is_error: | |
47 tl = self.missing_translations_ | |
48 id = clique.GetId() | |
49 if id not in tl: | |
50 tl[id] = {} | |
51 if lang not in tl[id]: | |
52 tl[id][lang] = 1 | |
53 | |
54 def HasMissingTranslations(self): | |
55 return len(self.missing_translations_) > 0 | |
56 | |
57 def MissingTranslationsReport(self): | |
58 '''Returns a string suitable for printing to report missing | |
59 and fallback translations to the user. | |
60 ''' | |
61 def ReportTranslation(clique, langs): | |
62 text = clique.GetMessage().GetPresentableContent() | |
63 # The text 'error' (usually 'Error:' but we are conservative) | |
64 # can trigger some build environments (Visual Studio, we're | |
65 # looking at you) to consider invocation of grit to have failed, | |
66 # so we make sure never to output that word. | |
67 extract = re.sub('(?i)error', 'REDACTED', text[0:40])[0:40] | |
68 ellipsis = '' | |
69 if len(text) > 40: | |
70 ellipsis = '...' | |
71 langs_extract = langs[0:6] | |
72 describe_langs = ','.join(langs_extract) | |
73 if len(langs) > 6: | |
74 describe_langs += " and %d more" % (len(langs) - 6) | |
75 return " %s \"%s%s\" %s" % (clique.GetId(), extract, ellipsis, | |
76 describe_langs) | |
77 lines = [] | |
78 if len(self.fallback_translations_): | |
79 lines.append( | |
80 "WARNING: Fell back to English for the following translations:") | |
81 for (id, langs) in self.fallback_translations_.items(): | |
82 lines.append(ReportTranslation(self.cliques_[id][0], langs.keys())) | |
83 if len(self.missing_translations_): | |
84 lines.append("ERROR: The following translations are MISSING:") | |
85 for (id, langs) in self.missing_translations_.items(): | |
86 lines.append(ReportTranslation(self.cliques_[id][0], langs.keys())) | |
87 return '\n'.join(lines) | |
88 | |
89 def MakeClique(self, message, translateable=True): | |
90 '''Create a new clique initialized with a message. | |
91 | |
92 Args: | |
93 message: tclib.Message() | |
94 translateable: True | False | |
95 ''' | |
96 clique = MessageClique(self, message, translateable) | |
97 | |
98 # Enable others to find this clique by its message ID | |
99 if message.GetId() in self.cliques_: | |
100 presentable_text = clique.GetMessage().GetPresentableContent() | |
101 if not message.HasAssignedId(): | |
102 for c in self.cliques_[message.GetId()]: | |
103 assert c.GetMessage().GetPresentableContent() == presentable_text | |
104 self.cliques_[message.GetId()].append(clique) | |
105 # We need to keep each list of cliques sorted by description, to | |
106 # achieve stable results from the BestClique method, see below. | |
107 self.cliques_[message.GetId()].sort( | |
108 key=lambda c:c.GetMessage().GetDescription()) | |
109 else: | |
110 self.cliques_[message.GetId()] = [clique] | |
111 | |
112 return clique | |
113 | |
114 def FindCliqueAndAddTranslation(self, translation, language): | |
115 '''Adds the specified translation to the clique with the source message | |
116 it is a translation of. | |
117 | |
118 Args: | |
119 translation: tclib.Translation() | |
120 language: 'en' | 'fr' ... | |
121 | |
122 Return: | |
123 True if the source message was found, otherwise false. | |
124 ''' | |
125 if translation.GetId() in self.cliques_: | |
126 for clique in self.cliques_[translation.GetId()]: | |
127 clique.AddTranslation(translation, language) | |
128 return True | |
129 else: | |
130 return False | |
131 | |
132 def BestClique(self, id): | |
133 '''Returns the "best" clique from a list of cliques. All the cliques | |
134 must have the same ID. The "best" clique is chosen in the following | |
135 order of preference: | |
136 - The first clique that has a non-ID-based description. | |
137 - If no such clique found, the first clique with an ID-based description. | |
138 - Otherwise the first clique. | |
139 | |
140 This method is stable in terms of always returning a clique with | |
141 an identical description (on different runs of GRIT on the same | |
142 data) because self.cliques_ is sorted by description. | |
143 ''' | |
144 clique_list = self.cliques_[id] | |
145 clique_with_id = None | |
146 clique_default = None | |
147 for clique in clique_list: | |
148 if not clique_default: | |
149 clique_default = clique | |
150 | |
151 description = clique.GetMessage().GetDescription() | |
152 if description and len(description) > 0: | |
153 if not description.startswith('ID:'): | |
154 # this is the preferred case so we exit right away | |
155 return clique | |
156 elif not clique_with_id: | |
157 clique_with_id = clique | |
158 if clique_with_id: | |
159 return clique_with_id | |
160 else: | |
161 return clique_default | |
162 | |
163 def BestCliquePerId(self): | |
164 '''Iterates over the list of all cliques and returns the best clique for | |
165 each ID. This will be the first clique with a source message that has a | |
166 non-empty description, or an arbitrary clique if none of them has a | |
167 description. | |
168 ''' | |
169 for id in self.cliques_: | |
170 yield self.BestClique(id) | |
171 | |
172 def BestCliqueByOriginalText(self, text, meaning): | |
173 '''Finds the "best" (as in BestClique()) clique that has original text | |
174 'text' and meaning 'meaning'. Returns None if there is no such clique. | |
175 ''' | |
176 # If needed, this can be optimized by maintaining a map of | |
177 # fingerprints of original text+meaning to cliques. | |
178 for c in self.BestCliquePerId(): | |
179 msg = c.GetMessage() | |
180 if msg.GetRealContent() == text and msg.GetMeaning() == meaning: | |
181 return msg | |
182 return None | |
183 | |
184 def AllMessageIds(self): | |
185 '''Returns a list of all defined message IDs. | |
186 ''' | |
187 return self.cliques_.keys() | |
188 | |
189 def AllCliques(self): | |
190 '''Iterates over all cliques. Note that this can return multiple cliques | |
191 with the same ID. | |
192 ''' | |
193 for cliques in self.cliques_.values(): | |
194 for c in cliques: | |
195 yield c | |
196 | |
197 def GenerateXtbParserCallback(self, lang, debug=False): | |
198 '''Creates a callback function as required by grit.xtb_reader.Parse(). | |
199 This callback will create Translation objects for each message from | |
200 the XTB that exists in this uberclique, and add them as translations for | |
201 the relevant cliques. The callback will add translations to the language | |
202 specified by 'lang' | |
203 | |
204 Args: | |
205 lang: 'fr' | |
206 debug: True | False | |
207 ''' | |
208 def Callback(id, structure): | |
209 if id not in self.cliques_: | |
210 if debug: print "Ignoring translation #%s" % id | |
211 return | |
212 | |
213 if debug: print "Adding translation #%s" % id | |
214 | |
215 # We fetch placeholder information from the original message (the XTB file | |
216 # only contains placeholder names). | |
217 original_msg = self.BestClique(id).GetMessage() | |
218 | |
219 translation = tclib.Translation(id=id) | |
220 for is_ph,text in structure: | |
221 if not is_ph: | |
222 translation.AppendText(text) | |
223 else: | |
224 found_placeholder = False | |
225 for ph in original_msg.GetPlaceholders(): | |
226 if ph.GetPresentation() == text: | |
227 translation.AppendPlaceholder(tclib.Placeholder( | |
228 ph.GetPresentation(), ph.GetOriginal(), ph.GetExample())) | |
229 found_placeholder = True | |
230 break | |
231 if not found_placeholder: | |
232 raise exception.MismatchingPlaceholders( | |
233 'Translation for message ID %s had <ph name="%s"/>, no match\n' | |
234 'in original message' % (id, text)) | |
235 self.FindCliqueAndAddTranslation(translation, lang) | |
236 return Callback | |
237 | |
238 | |
239 class CustomType(object): | |
240 '''A base class you should implement if you wish to specify a custom type | |
241 for a message clique (i.e. custom validation and optional modification of | |
242 translations).''' | |
243 | |
244 def Validate(self, message): | |
245 '''Returns true if the message (a tclib.Message object) is valid, | |
246 otherwise false. | |
247 ''' | |
248 raise NotImplementedError() | |
249 | |
250 def ValidateAndModify(self, lang, translation): | |
251 '''Returns true if the translation (a tclib.Translation object) is valid, | |
252 otherwise false. The language is also passed in. This method may modify | |
253 the translation that is passed in, if it so wishes. | |
254 ''' | |
255 raise NotImplementedError() | |
256 | |
257 def ModifyTextPart(self, lang, text): | |
258 '''If you call ModifyEachTextPart, it will turn around and call this method | |
259 for each text part of the translation. You should return the modified | |
260 version of the text, or just the original text to not change anything. | |
261 ''' | |
262 raise NotImplementedError() | |
263 | |
264 def ModifyEachTextPart(self, lang, translation): | |
265 '''Call this to easily modify one or more of the textual parts of a | |
266 translation. It will call ModifyTextPart for each part of the | |
267 translation. | |
268 ''' | |
269 contents = translation.GetContent() | |
270 for ix in range(len(contents)): | |
271 if (isinstance(contents[ix], types.StringTypes)): | |
272 contents[ix] = self.ModifyTextPart(lang, contents[ix]) | |
273 | |
274 | |
275 class OneOffCustomType(CustomType): | |
276 '''A very simple custom type that performs the validation expressed by | |
277 the input expression on all languages including the source language. | |
278 The expression can access the variables 'lang', 'msg' and 'text()' where 'lang
' | |
279 is the language of 'msg', 'msg' is the message or translation being | |
280 validated and 'text()' returns the real contents of 'msg' (for shorthand). | |
281 ''' | |
282 def __init__(self, expression): | |
283 self.expr = expression | |
284 def Validate(self, message): | |
285 return self.ValidateAndModify(MessageClique.source_language, message) | |
286 def ValidateAndModify(self, lang, msg): | |
287 def text(): | |
288 return msg.GetRealContent() | |
289 return eval(self.expr, {}, | |
290 {'lang' : lang, | |
291 'text' : text, | |
292 'msg' : msg, | |
293 }) | |
294 | |
295 | |
296 class MessageClique(object): | |
297 '''A message along with all of its translations. Also code to bring | |
298 translations together with their original message.''' | |
299 | |
300 # change this to the language code of Messages you add to cliques_. | |
301 # TODO(joi) Actually change this based on the <grit> node's source language | |
302 source_language = 'en' | |
303 | |
304 # A constant translation we use when asked for a translation into the | |
305 # special language constants.CONSTANT_LANGUAGE. | |
306 CONSTANT_TRANSLATION = tclib.Translation(text='TTTTTT') | |
307 | |
308 # A pattern to match messages that are empty or whitespace only. | |
309 WHITESPACE_MESSAGE = lazy_re.compile(u'^\s*$') | |
310 | |
311 def __init__(self, uber_clique, message, translateable=True, custom_type=None)
: | |
312 '''Create a new clique initialized with just a message. | |
313 | |
314 Note that messages with a body comprised only of whitespace will implicitly | |
315 be marked non-translatable. | |
316 | |
317 Args: | |
318 uber_clique: Our uber-clique (collection of cliques) | |
319 message: tclib.Message() | |
320 translateable: True | False | |
321 custom_type: instance of clique.CustomType interface | |
322 ''' | |
323 # Our parent | |
324 self.uber_clique = uber_clique | |
325 # If not translateable, we only store the original message. | |
326 self.translateable = translateable | |
327 | |
328 # We implicitly mark messages that have a whitespace-only body as | |
329 # non-translateable. | |
330 if MessageClique.WHITESPACE_MESSAGE.match(message.GetRealContent()): | |
331 self.translateable = False | |
332 | |
333 # A mapping of language identifiers to tclib.BaseMessage and its | |
334 # subclasses (i.e. tclib.Message and tclib.Translation). | |
335 self.clique = { MessageClique.source_language : message } | |
336 # A list of the "shortcut groups" this clique is | |
337 # part of. Within any given shortcut group, no shortcut key (e.g. &J) | |
338 # must appear more than once in each language for all cliques that | |
339 # belong to the group. | |
340 self.shortcut_groups = [] | |
341 # An instance of the CustomType interface, or None. If this is set, it will | |
342 # be used to validate the original message and translations thereof, and | |
343 # will also get a chance to modify translations of the message. | |
344 self.SetCustomType(custom_type) | |
345 | |
346 def GetMessage(self): | |
347 '''Retrieves the tclib.Message that is the source for this clique.''' | |
348 return self.clique[MessageClique.source_language] | |
349 | |
350 def GetId(self): | |
351 '''Retrieves the message ID of the messages in this clique.''' | |
352 return self.GetMessage().GetId() | |
353 | |
354 def IsTranslateable(self): | |
355 return self.translateable | |
356 | |
357 def AddToShortcutGroup(self, group): | |
358 self.shortcut_groups.append(group) | |
359 | |
360 def SetCustomType(self, custom_type): | |
361 '''Makes this clique use custom_type for validating messages and | |
362 translations, and optionally modifying translations. | |
363 ''' | |
364 self.custom_type = custom_type | |
365 if custom_type and not custom_type.Validate(self.GetMessage()): | |
366 raise exception.InvalidMessage(self.GetMessage().GetRealContent()) | |
367 | |
368 def MessageForLanguage(self, lang, pseudo_if_no_match=True, fallback_to_englis
h=False): | |
369 '''Returns the message/translation for the specified language, providing | |
370 a pseudotranslation if there is no available translation and a pseudo- | |
371 translation is requested. | |
372 | |
373 The translation of any message whatsoever in the special language | |
374 'x_constant' is the message "TTTTTT". | |
375 | |
376 Args: | |
377 lang: 'en' | |
378 pseudo_if_no_match: True | |
379 fallback_to_english: False | |
380 | |
381 Return: | |
382 tclib.BaseMessage | |
383 ''' | |
384 if not self.translateable: | |
385 return self.GetMessage() | |
386 | |
387 if lang == constants.CONSTANT_LANGUAGE: | |
388 return self.CONSTANT_TRANSLATION | |
389 | |
390 for msglang in self.clique.keys(): | |
391 if lang == msglang: | |
392 return self.clique[msglang] | |
393 | |
394 if lang == constants.FAKE_BIDI: | |
395 return pseudo_rtl.PseudoRTLMessage(self.GetMessage()) | |
396 | |
397 if fallback_to_english: | |
398 self.uber_clique._AddMissingTranslation(lang, self, is_error=False) | |
399 return self.GetMessage() | |
400 | |
401 # If we're not supposed to generate pseudotranslations, we add an error | |
402 # report to a list of errors, then fail at a higher level, so that we | |
403 # get a list of all messages that are missing translations. | |
404 if not pseudo_if_no_match: | |
405 self.uber_clique._AddMissingTranslation(lang, self, is_error=True) | |
406 | |
407 return pseudo.PseudoMessage(self.GetMessage()) | |
408 | |
409 def AllMessagesThatMatch(self, lang_re, include_pseudo = True): | |
410 '''Returns a map of all messages that match 'lang', including the pseudo | |
411 translation if requested. | |
412 | |
413 Args: | |
414 lang_re: re.compile('fr|en') | |
415 include_pseudo: True | |
416 | |
417 Return: | |
418 { 'en' : tclib.Message, | |
419 'fr' : tclib.Translation, | |
420 pseudo.PSEUDO_LANG : tclib.Translation } | |
421 ''' | |
422 if not self.translateable: | |
423 return [self.GetMessage()] | |
424 | |
425 matches = {} | |
426 for msglang in self.clique: | |
427 if lang_re.match(msglang): | |
428 matches[msglang] = self.clique[msglang] | |
429 | |
430 if include_pseudo: | |
431 matches[pseudo.PSEUDO_LANG] = pseudo.PseudoMessage(self.GetMessage()) | |
432 | |
433 return matches | |
434 | |
435 def AddTranslation(self, translation, language): | |
436 '''Add a translation to this clique. The translation must have the same | |
437 ID as the message that is the source for this clique. | |
438 | |
439 If this clique is not translateable, the function just returns. | |
440 | |
441 Args: | |
442 translation: tclib.Translation() | |
443 language: 'en' | |
444 | |
445 Throws: | |
446 grit.exception.InvalidTranslation if the translation you're trying to add | |
447 doesn't have the same message ID as the source message of this clique. | |
448 ''' | |
449 if not self.translateable: | |
450 return | |
451 if translation.GetId() != self.GetId(): | |
452 raise exception.InvalidTranslation( | |
453 'Msg ID %s, transl ID %s' % (self.GetId(), translation.GetId())) | |
454 | |
455 assert not language in self.clique | |
456 | |
457 # Because two messages can differ in the original content of their | |
458 # placeholders yet share the same ID (because they are otherwise the | |
459 # same), the translation we are getting may have different original | |
460 # content for placeholders than our message, yet it is still the right | |
461 # translation for our message (because it is for the same ID). We must | |
462 # therefore fetch the original content of placeholders from our original | |
463 # English message. | |
464 # | |
465 # See grit.clique_unittest.MessageCliqueUnittest.testSemiIdenticalCliques | |
466 # for a concrete explanation of why this is necessary. | |
467 | |
468 original = self.MessageForLanguage(self.source_language, False) | |
469 if len(original.GetPlaceholders()) != len(translation.GetPlaceholders()): | |
470 print ("ERROR: '%s' translation of message id %s does not match" % | |
471 (language, translation.GetId())) | |
472 assert False | |
473 | |
474 transl_msg = tclib.Translation(id=self.GetId(), | |
475 text=translation.GetPresentableContent(), | |
476 placeholders=original.GetPlaceholders()) | |
477 | |
478 if self.custom_type and not self.custom_type.ValidateAndModify(language, tra
nsl_msg): | |
479 print "WARNING: %s translation failed validation: %s" % ( | |
480 language, transl_msg.GetId()) | |
481 | |
482 self.clique[language] = transl_msg | |
483 | |
OLD | NEW |