OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 '''Pseudo RTL, (aka Fake Bidi) support. It simply wraps each word with |
| 7 Unicode RTL overrides. |
| 8 More info at https://sites.google.com/a/chromium.org/dev/Home/fake-bidi |
| 9 ''' |
| 10 |
| 11 import re |
| 12 import types |
| 13 |
| 14 from grit import tclib |
| 15 |
| 16 ACCENTED_STRINGS = { |
| 17 'a': u"\u00e5", 'e': u"\u00e9", 'i': u"\u00ee", 'o': u"\u00f6", |
| 18 'u': u"\u00fb", 'A': u"\u00c5", 'E': u"\u00c9", 'I': u"\u00ce", |
| 19 'O': u"\u00d6", 'U': u"\u00db", 'c': u"\u00e7", 'd': u"\u00f0", |
| 20 'n': u"\u00f1", 'p': u"\u00fe", 'y': u"\u00fd", 'C': u"\u00c7", |
| 21 'D': u"\u00d0", 'N': u"\u00d1", 'P': u"\u00de", 'Y': u"\u00dd", |
| 22 'f': u"\u0192", 's': u"\u0161", 'S': u"\u0160", 'z': u"\u017e", |
| 23 'Z': u"\u017d", 'g': u"\u011d", 'G': u"\u011c", 'h': u"\u0125", |
| 24 'H': u"\u0124", 'j': u"\u0135", 'J': u"\u0134", 'k': u"\u0137", |
| 25 'K': u"\u0136", 'l': u"\u013c", 'L': u"\u013b", 't': u"\u0163", |
| 26 'T': u"\u0162", 'w': u"\u0175", 'W': u"\u0174", |
| 27 '$': u"\u20ac", '?': u"\u00bf", 'R': u"\u00ae", r'!': u"\u00a1", |
| 28 } |
| 29 |
| 30 # a character set containing the keys in ACCENTED_STRINGS |
| 31 # We should not accent characters in an escape sequence such as "\n". |
| 32 # To be safe, we assume every character following a backslash is an escaped |
| 33 # character. We also need to consider the case like "\\n", which means |
| 34 # a blackslash and a character "n", we will accent the character "n". |
| 35 TO_ACCENT = re.compile(r'[%s]|\\[a-z\\]' % ''.join(ACCENTED_STRINGS.keys())) |
| 36 |
| 37 # Lex text so that we don't interfere with html tokens and entities. |
| 38 # This lexing scheme will handle all well formed tags and entities, html or |
| 39 # xhtml. It will not handle comments, CDATA sections, or the unescaping tags: |
| 40 # script, style, xmp or listing. If any of those appear in messages, |
| 41 # something is wrong. |
| 42 TOKENS = [ re.compile( |
| 43 '^%s' % pattern, # match at the beginning of input |
| 44 re.I | re.S # html tokens are case-insensitive |
| 45 ) |
| 46 for pattern in |
| 47 ( |
| 48 # a run of non html special characters |
| 49 r'[^<&]+', |
| 50 # a tag |
| 51 (r'</?[a-z]\w*' # beginning of tag |
| 52 r'(?:\s+\w+(?:\s*=\s*' # attribute start |
| 53 r'(?:[^\s"\'>]+|"[^\"]*"|\'[^\']*\'))?' # attribute value |
| 54 r')*\s*/?>'), |
| 55 # an entity |
| 56 r'&(?:[a-z]\w+|#\d+|#x[\da-f]+);', |
| 57 # an html special character not part of a special sequence |
| 58 r'.' |
| 59 ) ] |
| 60 |
| 61 ALPHABETIC_RUN = re.compile(r'([^\W0-9_]+)') |
| 62 |
| 63 RLO = u'\u202e' |
| 64 PDF = u'\u202c' |
| 65 |
| 66 def PseudoRTLString(text): |
| 67 '''Returns a fake bidirectional version of the source string. This code is |
| 68 based on accentString above, in turn copied from Frank Tang. |
| 69 ''' |
| 70 parts = [] |
| 71 while text: |
| 72 m = None |
| 73 for token in TOKENS: |
| 74 m = token.search(text) |
| 75 if m: |
| 76 part = m.group(0) |
| 77 text = text[len(part):] |
| 78 if part[0] not in ('<', '&'): |
| 79 # not a tag or entity, so accent |
| 80 part = ALPHABETIC_RUN.sub(lambda run: RLO + run.group() + PDF, part) |
| 81 parts.append(part) |
| 82 break |
| 83 return ''.join(parts) |
| 84 |
| 85 |
| 86 def PseudoRTLMessage(message): |
| 87 '''Returns a pseudo-RTL (aka Fake-Bidi) translation of the provided message. |
| 88 |
| 89 Args: |
| 90 message: tclib.Message() |
| 91 |
| 92 Return: |
| 93 tclib.Translation() |
| 94 ''' |
| 95 transl = tclib.Translation() |
| 96 for part in message.GetContent(): |
| 97 if isinstance(part, tclib.Placeholder): |
| 98 transl.AppendPlaceholder(part) |
| 99 else: |
| 100 transl.AppendText(PseudoRTLString(part)) |
| 101 |
| 102 return transl |
OLD | NEW |