| Index: grit/pseudo_rtl.py
|
| ===================================================================
|
| --- grit/pseudo_rtl.py (revision 0)
|
| +++ grit/pseudo_rtl.py (revision 0)
|
| @@ -0,0 +1,102 @@
|
| +#!/usr/bin/python
|
| +# Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +'''Pseudo RTL, (aka Fake Bidi) support. It simply wraps each word with
|
| +Unicode RTL overrides.
|
| +More info at https://sites.google.com/a/chromium.org/dev/Home/fake-bidi
|
| +'''
|
| +
|
| +import re
|
| +import types
|
| +
|
| +from grit import tclib
|
| +
|
| +ACCENTED_STRINGS = {
|
| + 'a': u"\u00e5", 'e': u"\u00e9", 'i': u"\u00ee", 'o': u"\u00f6",
|
| + 'u': u"\u00fb", 'A': u"\u00c5", 'E': u"\u00c9", 'I': u"\u00ce",
|
| + 'O': u"\u00d6", 'U': u"\u00db", 'c': u"\u00e7", 'd': u"\u00f0",
|
| + 'n': u"\u00f1", 'p': u"\u00fe", 'y': u"\u00fd", 'C': u"\u00c7",
|
| + 'D': u"\u00d0", 'N': u"\u00d1", 'P': u"\u00de", 'Y': u"\u00dd",
|
| + 'f': u"\u0192", 's': u"\u0161", 'S': u"\u0160", 'z': u"\u017e",
|
| + 'Z': u"\u017d", 'g': u"\u011d", 'G': u"\u011c", 'h': u"\u0125",
|
| + 'H': u"\u0124", 'j': u"\u0135", 'J': u"\u0134", 'k': u"\u0137",
|
| + 'K': u"\u0136", 'l': u"\u013c", 'L': u"\u013b", 't': u"\u0163",
|
| + 'T': u"\u0162", 'w': u"\u0175", 'W': u"\u0174",
|
| + '$': u"\u20ac", '?': u"\u00bf", 'R': u"\u00ae", r'!': u"\u00a1",
|
| +}
|
| +
|
| +# a character set containing the keys in ACCENTED_STRINGS
|
| +# We should not accent characters in an escape sequence such as "\n".
|
| +# To be safe, we assume every character following a backslash is an escaped
|
| +# character. We also need to consider the case like "\\n", which means
|
| +# a blackslash and a character "n", we will accent the character "n".
|
| +TO_ACCENT = re.compile(r'[%s]|\\[a-z\\]' % ''.join(ACCENTED_STRINGS.keys()))
|
| +
|
| +# Lex text so that we don't interfere with html tokens and entities.
|
| +# This lexing scheme will handle all well formed tags and entities, html or
|
| +# xhtml. It will not handle comments, CDATA sections, or the unescaping tags:
|
| +# script, style, xmp or listing. If any of those appear in messages,
|
| +# something is wrong.
|
| +TOKENS = [ re.compile(
|
| + '^%s' % pattern, # match at the beginning of input
|
| + re.I | re.S # html tokens are case-insensitive
|
| + )
|
| + for pattern in
|
| + (
|
| + # a run of non html special characters
|
| + r'[^<&]+',
|
| + # a tag
|
| + (r'</?[a-z]\w*' # beginning of tag
|
| + r'(?:\s+\w+(?:\s*=\s*' # attribute start
|
| + r'(?:[^\s"\'>]+|"[^\"]*"|\'[^\']*\'))?' # attribute value
|
| + r')*\s*/?>'),
|
| + # an entity
|
| + r'&(?:[a-z]\w+|#\d+|#x[\da-f]+);',
|
| + # an html special character not part of a special sequence
|
| + r'.'
|
| + ) ]
|
| +
|
| +ALPHABETIC_RUN = re.compile(r'([^\W0-9_]+)')
|
| +
|
| +RLO = u'\u202e'
|
| +PDF = u'\u202c'
|
| +
|
| +def PseudoRTLString(text):
|
| + '''Returns a fake bidirectional version of the source string. This code is
|
| + based on accentString above, in turn copied from Frank Tang.
|
| + '''
|
| + parts = []
|
| + while text:
|
| + m = None
|
| + for token in TOKENS:
|
| + m = token.search(text)
|
| + if m:
|
| + part = m.group(0)
|
| + text = text[len(part):]
|
| + if part[0] not in ('<', '&'):
|
| + # not a tag or entity, so accent
|
| + part = ALPHABETIC_RUN.sub(lambda run: RLO + run.group() + PDF, part)
|
| + parts.append(part)
|
| + break
|
| + return ''.join(parts)
|
| +
|
| +
|
| +def PseudoRTLMessage(message):
|
| + '''Returns a pseudo-RTL (aka Fake-Bidi) translation of the provided message.
|
| +
|
| + Args:
|
| + message: tclib.Message()
|
| +
|
| + Return:
|
| + tclib.Translation()
|
| + '''
|
| + transl = tclib.Translation()
|
| + for part in message.GetContent():
|
| + if isinstance(part, tclib.Placeholder):
|
| + transl.AppendPlaceholder(part)
|
| + else:
|
| + transl.AppendText(PseudoRTLString(part))
|
| +
|
| + return transl
|
|
|
| Property changes on: grit/pseudo_rtl.py
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|