grit/pseudo_rtl.py - Issue 7994004: Initial source commit to grit-i18n project.

Unified Diff: grit/pseudo_rtl.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 9 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: grit/pseudo_rtl.py

===================================================================

--- grit/pseudo_rtl.py (revision 0)

+++ grit/pseudo_rtl.py (revision 0)

@@ -0,0 +1,102 @@

+#!/usr/bin/python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+'''Pseudo RTL, (aka Fake Bidi) support. It simply wraps each word with

+Unicode RTL overrides.

+More info at https://sites.google.com/a/chromium.org/dev/Home/fake-bidi

+'''

+import re

+import types

+from grit import tclib

+ACCENTED_STRINGS = {

+ 'a': u"\u00e5", 'e': u"\u00e9", 'i': u"\u00ee", 'o': u"\u00f6",

+ 'u': u"\u00fb", 'A': u"\u00c5", 'E': u"\u00c9", 'I': u"\u00ce",

+ 'O': u"\u00d6", 'U': u"\u00db", 'c': u"\u00e7", 'd': u"\u00f0",

+ 'n': u"\u00f1", 'p': u"\u00fe", 'y': u"\u00fd", 'C': u"\u00c7",

+ 'D': u"\u00d0", 'N': u"\u00d1", 'P': u"\u00de", 'Y': u"\u00dd",

+ 'f': u"\u0192", 's': u"\u0161", 'S': u"\u0160", 'z': u"\u017e",

+ 'Z': u"\u017d", 'g': u"\u011d", 'G': u"\u011c", 'h': u"\u0125",

+ 'H': u"\u0124", 'j': u"\u0135", 'J': u"\u0134", 'k': u"\u0137",

+ 'K': u"\u0136", 'l': u"\u013c", 'L': u"\u013b", 't': u"\u0163",

+ 'T': u"\u0162", 'w': u"\u0175", 'W': u"\u0174",

+ '$': u"\u20ac", '?': u"\u00bf", 'R': u"\u00ae", r'!': u"\u00a1",

+# a character set containing the keys in ACCENTED_STRINGS

+# We should not accent characters in an escape sequence such as "\n".

+# To be safe, we assume every character following a backslash is an escaped

+# character. We also need to consider the case like "\\n", which means

+# a blackslash and a character "n", we will accent the character "n".

+TO_ACCENT = re.compile(r'[%s]|\\[a-z\\]' % ''.join(ACCENTED_STRINGS.keys()))

+# Lex text so that we don't interfere with html tokens and entities.

+# This lexing scheme will handle all well formed tags and entities, html or

+# xhtml. It will not handle comments, CDATA sections, or the unescaping tags:

+# script, style, xmp or listing. If any of those appear in messages,

+# something is wrong.

+TOKENS = [ re.compile(

+ '^%s' % pattern, # match at the beginning of input

+ re.I | re.S # html tokens are case-insensitive

+ )

+ for pattern in

+ (

+ # a run of non html special characters

+ r'[^<&]+',

+ # a tag

+ (r'</?[a-z]\w*' # beginning of tag

+ r'(?:\s+\w+(?:\s*=\s*' # attribute start

+ r'(?:[^\s"\'>]+|"[^\"]*"|\'[^\']*\'))?' # attribute value

+ r')*\s*/?>'),

+ # an entity

+ r'&(?:[a-z]\w+|#\d+|#x[\da-f]+);',

+ # an html special character not part of a special sequence

+ r'.'

+ ) ]

+ALPHABETIC_RUN = re.compile(r'([^\W0-9_]+)')

+RLO = u'\u202e'

+PDF = u'\u202c'

+def PseudoRTLString(text):

+ '''Returns a fake bidirectional version of the source string. This code is

+ based on accentString above, in turn copied from Frank Tang.

+ '''

+ parts = []

+ while text:

+ m = None

+ for token in TOKENS:

+ m = token.search(text)

+ if m:

+ part = m.group(0)

+ text = text[len(part):]

+ if part[0] not in ('<', '&'):

+ # not a tag or entity, so accent

+ part = ALPHABETIC_RUN.sub(lambda run: RLO + run.group() + PDF, part)

+ parts.append(part)

+ break

+ return ''.join(parts)

+def PseudoRTLMessage(message):

+ '''Returns a pseudo-RTL (aka Fake-Bidi) translation of the provided message.

+ Args:

+ message: tclib.Message()

+ Return:

+ tclib.Translation()

+ '''

+ transl = tclib.Translation()

+ for part in message.GetContent():

+ if isinstance(part, tclib.Placeholder):

+ transl.AppendPlaceholder(part)

+ else:

+ transl.AppendText(PseudoRTLString(part))

+ return transl

Property changes on: grit/pseudo_rtl.py

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « grit/pseudo.py ('k') | grit/pseudo_unittest.py » ('j') | no next file with comments »