grit/util.py - Issue 7994004: Initial source commit to grit-i18n project.

Unified Diff: grit/util.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 9 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: grit/util.py

===================================================================

--- grit/util.py (revision 0)

+++ grit/util.py (revision 0)

@@ -0,0 +1,310 @@

+#!/usr/bin/python2.4

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+'''Utilities used by GRIT.

+'''

+import sys

+import os.path

+import codecs

+import htmlentitydefs

+import re

+import time

+from xml.sax import saxutils

+_root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))

+# Matches all of the resource IDs predefined by Windows.

+# The '\b' before and after each word makes sure these match only whole words and

+# not the beginning of any word.. eg. ID_FILE_NEW will not match ID_FILE_NEW_PROJECT

+# see http://www.amk.ca/python/howto/regex/ (search for "\bclass\b" inside the html page)

+SYSTEM_IDENTIFIERS = re.compile(

+ \bID_FILE_NEW\b | \bID_FILE_OPEN\b | \bID_FILE_CLOSE\b | \bID_FILE_SAVE\b |

+ \bID_FILE_SAVE_AS\b | \bID_FILE_PAGE_SETUP\b | \bID_FILE_PRINT_SETUP\b |

+ \bID_FILE_PRINT\b | \bID_FILE_PRINT_DIRECT\b | \bID_FILE_PRINT_PREVIEW\b |

+ \bID_FILE_UPDATE\b | \bID_FILE_SAVE_COPY_AS\b | \bID_FILE_SEND_MAIL\b |

+ \bID_FILE_MRU_FIRST\b | \bID_FILE_MRU_LAST\b |

+ \bID_EDIT_CLEAR\b | \bID_EDIT_CLEAR_ALL\b | \bID_EDIT_COPY\b |

+ \bID_EDIT_CUT\b | \bID_EDIT_FIND\b | \bID_EDIT_PASTE\b | \bID_EDIT_PASTE_LINK\b |

+ \bID_EDIT_PASTE_SPECIAL\b | \bID_EDIT_REPEAT\b | \bID_EDIT_REPLACE\b |

+ \bID_EDIT_SELECT_ALL\b | \bID_EDIT_UNDO\b | \bID_EDIT_REDO\b |

+ \bVS_VERSION_INFO\b | \bIDRETRY''', re.VERBOSE);

+# Matches character entities, whether specified by name, decimal or hex.

+_HTML_ENTITY = re.compile(

+ '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));',

+ re.IGNORECASE)

+# Matches characters that should be HTML-escaped. This is <, > and &, but only

+# if the & is not the start of an HTML character entity.

+_HTML_CHARS_TO_ESCAPE = re.compile('"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)',

+ re.IGNORECASE | re.MULTILINE)

+def WrapInputStream(stream, encoding = 'utf-8'):

+ '''Returns a stream that wraps the provided stream, making it read characters

+ using the specified encoding.'''

+ (e, d, sr, sw) = codecs.lookup(encoding)

+ return sr(stream)

+def WrapOutputStream(stream, encoding = 'utf-8'):

+ '''Returns a stream that wraps the provided stream, making it write

+ characters using the specified encoding.'''

+ (e, d, sr, sw) = codecs.lookup(encoding)

+ return sw(stream)

+def ChangeStdoutEncoding(encoding = 'utf-8'):

+ '''Changes STDOUT to print characters using the specified encoding.'''

+ sys.stdout = WrapOutputStream(sys.stdout, encoding)

+def EscapeHtml(text, escape_quotes = False):

+ '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML

+ entities. Any existing named entity or HTML entity defined by decimal or

+ hex code will be left untouched. This is appropriate for escaping text for

+ inclusion in HTML, but not for XML.

+ '''

+ def Replace(match):

+ if match.group() == '&': return '&'

+ elif match.group() == '<': return '<'

+ elif match.group() == '>': return '>'

+ elif match.group() == '"':

+ if escape_quotes: return '"'

+ else: return match.group()

+ else: assert False

+ out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text)

+ return out

+def UnescapeHtml(text, replace_nbsp=True):

+ '''Returns 'text' with all HTML character entities (both named character

+ entities and those specified by decimal or hexadecimal Unicode ordinal)

+ replaced by their Unicode characters (or latin1 characters if possible).

+ The only exception is that   will not be escaped if 'replace_nbsp' is

+ False.

+ '''

+ def Replace(match):

+ groups = match.groupdict()

+ if groups['hex']:

+ return unichr(int(groups['hex'], 16))

+ elif groups['decimal']:

+ return unichr(int(groups['decimal'], 10))

+ else:

+ name = groups['named']

+ if name == 'nbsp' and not replace_nbsp:

+ return match.group() # Don't replace  

+ assert name != None

+ if name in htmlentitydefs.name2codepoint.keys():

+ return unichr(htmlentitydefs.name2codepoint[name])

+ else:

+ return match.group() # Unknown HTML character entity - don't replace

+ out = _HTML_ENTITY.sub(Replace, text)

+ return out

+def EncodeCdata(cdata):

+ '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]>

+ format, depending on which is more appropriate for easy editing. The data

+ is escaped for inclusion in an XML element's body.

+ Args:

+ cdata: 'If x < y and y < z then x < z'

+ Return:

+ '<![CDATA[If x < y and y < z then x < z]]>'

+ '''

+ if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0:

+ return '<![CDATA[%s]]>' % cdata

+ else:

+ return saxutils.escape(cdata)

+def FixupNamedParam(function, param_name, param_value):

+ '''Returns a closure that is identical to 'function' but ensures that the

+ named parameter 'param_name' is always set to 'param_value' unless explicitly

+ set by the caller.

+ Args:

+ function: callable

+ param_name: 'bingo'

+ param_value: 'bongo' (any type)

+ Return:

+ callable

+ '''

+ def FixupClosure(*args, **kw):

+ if not param_name in kw:

+ kw[param_name] = param_value

+ return function(*args, **kw)

+ return FixupClosure

+def PathFromRoot(path):

+ '''Takes a path relative to the root directory for GRIT (the one that grit.py

+ resides in) and returns a path that is either absolute or relative to the

+ current working directory (i.e .a path you can use to open the file).

+ Args:

+ path: 'rel_dir\file.ext'

+ Return:

+ 'c:\src\tools\rel_dir\file.ext

+ '''

+ return os.path.normpath(os.path.join(_root_dir, path))

+def FixRootForUnittest(root_node, dir=PathFromRoot('.')):

+ '''Adds a GetBaseDir() method to 'root_node', making unittesting easier.'''

+ def GetBaseDir():

+ '''Returns a fake base directory.'''

+ return dir

+ def GetSourceLanguage():

+ return 'en'

+ if not hasattr(root_node, 'GetBaseDir'):

+ setattr(root_node, 'GetBaseDir', GetBaseDir)

+ setattr(root_node, 'GetSourceLanguage', GetSourceLanguage)

+def dirname(filename):

+ '''Version of os.path.dirname() that never returns empty paths (returns

+ '.' if the result of os.path.dirname() is empty).

+ '''

+ ret = os.path.dirname(filename)

+ if ret == '':

+ ret = '.'

+ return ret

+def normpath(path):

+ '''Version of os.path.normpath that also changes backward slashes to

+ forward slashes when not running on Windows.

+ '''

+ # This is safe to always do because the Windows version of os.path.normpath

+ # will replace forward slashes with backward slashes.

+ path = path.replace('\\', '/')

+ return os.path.normpath(path)

+_LANGUAGE_SPLIT_RE = re.compile('-|_|/')

+def CanonicalLanguage(code):

+ '''Canonicalizes two-part language codes by using a dash and making the

+ second part upper case. Returns one-part language codes unchanged.

+ Args:

+ code: 'zh_cn'

+ Return:

+ code: 'zh-CN'

+ '''

+ parts = _LANGUAGE_SPLIT_RE.split(code)

+ code = [ parts[0] ]

+ for part in parts[1:]:

+ code.append(part.upper())

+ return '-'.join(code)

+_LANG_TO_CODEPAGE = {

+ 'en' : 1252,

+ 'fr' : 1252,

+ 'it' : 1252,

+ 'de' : 1252,

+ 'es' : 1252,

+ 'nl' : 1252,

+ 'sv' : 1252,

+ 'no' : 1252,

+ 'da' : 1252,

+ 'fi' : 1252,

+ 'pt-BR' : 1252,

+ 'ru' : 1251,

+ 'ja' : 932,

+ 'zh-TW' : 950,

+ 'zh-CN' : 936,

+ 'ko' : 949,

+def LanguageToCodepage(lang):

+ '''Returns the codepage _number_ that can be used to represent 'lang', which

+ may be either in formats such as 'en', 'pt_br', 'pt-BR', etc.

+ The codepage returned will be one of the 'cpXXXX' codepage numbers.

+ Args:

+ lang: 'de'

+ Return:

+ 1252

+ '''

+ lang = CanonicalLanguage(lang)

+ if lang in _LANG_TO_CODEPAGE:

+ return _LANG_TO_CODEPAGE[lang]

+ else:

+ print "Not sure which codepage to use for %s, assuming cp1252" % lang

+ return 1252

+def NewClassInstance(class_name, class_type):

+ '''Returns an instance of the class specified in classname

+ Args:

+ class_name: the fully qualified, dot separated package + classname,

+ i.e. "my.package.name.MyClass". Short class names are not supported.

+ class_type: the class or superclass this object must implement

+ Return:

+ An instance of the class, or None if none was found

+ '''

+ lastdot = class_name.rfind('.')

+ module_name = ''

+ if lastdot >= 0:

+ module_name = class_name[0:lastdot]

+ if module_name:

+ class_name = class_name[lastdot+1:]

+ module = __import__(module_name, globals(), locals(), [''])

+ if hasattr(module, class_name):

+ class_ = getattr(module, class_name)

+ class_instance = class_()

+ if isinstance(class_instance, class_type):

+ return class_instance

+ return None

+def FixLineEnd(text, line_end):

+ # First normalize

+ text = text.replace('\r\n', '\n')

+ text = text.replace('\r', '\n')

+ # Then fix

+ text = text.replace('\n', line_end)

+ return text

+def BoolToString(bool):

+ if bool:

+ return 'true'

+ else:

+ return 'false'

+verbose = False

+extra_verbose = False

+def IsVerbose():

+ return verbose

+def IsExtraVerbose():

+ return extra_verbose

+def GetCurrentYear():

+ '''Returns the current 4-digit year as an integer.'''

+ return time.localtime()[0]

Property changes on: grit/util.py

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « grit/tool/unit.py ('k') | grit/util_unittest.py » ('j') | no next file with comments »