| Index: grit/util.py
|
| ===================================================================
|
| --- grit/util.py (revision 0)
|
| +++ grit/util.py (revision 0)
|
| @@ -0,0 +1,310 @@
|
| +#!/usr/bin/python2.4
|
| +# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +'''Utilities used by GRIT.
|
| +'''
|
| +
|
| +import sys
|
| +import os.path
|
| +import codecs
|
| +import htmlentitydefs
|
| +import re
|
| +import time
|
| +from xml.sax import saxutils
|
| +
|
| +_root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
|
| +
|
| +
|
| +# Matches all of the resource IDs predefined by Windows.
|
| +# The '\b' before and after each word makes sure these match only whole words and
|
| +# not the beginning of any word.. eg. ID_FILE_NEW will not match ID_FILE_NEW_PROJECT
|
| +# see http://www.amk.ca/python/howto/regex/ (search for "\bclass\b" inside the html page)
|
| +SYSTEM_IDENTIFIERS = re.compile(
|
| + r'''\bIDOK\b | \bIDCANCEL\b | \bIDC_STATIC\b | \bIDYES\b | \bIDNO\b |
|
| + \bID_FILE_NEW\b | \bID_FILE_OPEN\b | \bID_FILE_CLOSE\b | \bID_FILE_SAVE\b |
|
| + \bID_FILE_SAVE_AS\b | \bID_FILE_PAGE_SETUP\b | \bID_FILE_PRINT_SETUP\b |
|
| + \bID_FILE_PRINT\b | \bID_FILE_PRINT_DIRECT\b | \bID_FILE_PRINT_PREVIEW\b |
|
| + \bID_FILE_UPDATE\b | \bID_FILE_SAVE_COPY_AS\b | \bID_FILE_SEND_MAIL\b |
|
| + \bID_FILE_MRU_FIRST\b | \bID_FILE_MRU_LAST\b |
|
| + \bID_EDIT_CLEAR\b | \bID_EDIT_CLEAR_ALL\b | \bID_EDIT_COPY\b |
|
| + \bID_EDIT_CUT\b | \bID_EDIT_FIND\b | \bID_EDIT_PASTE\b | \bID_EDIT_PASTE_LINK\b |
|
| + \bID_EDIT_PASTE_SPECIAL\b | \bID_EDIT_REPEAT\b | \bID_EDIT_REPLACE\b |
|
| + \bID_EDIT_SELECT_ALL\b | \bID_EDIT_UNDO\b | \bID_EDIT_REDO\b |
|
| + \bVS_VERSION_INFO\b | \bIDRETRY''', re.VERBOSE);
|
| +
|
| +
|
| +# Matches character entities, whether specified by name, decimal or hex.
|
| +_HTML_ENTITY = re.compile(
|
| + '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));',
|
| + re.IGNORECASE)
|
| +
|
| +# Matches characters that should be HTML-escaped. This is <, > and &, but only
|
| +# if the & is not the start of an HTML character entity.
|
| +_HTML_CHARS_TO_ESCAPE = re.compile('"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)',
|
| + re.IGNORECASE | re.MULTILINE)
|
| +
|
| +
|
| +def WrapInputStream(stream, encoding = 'utf-8'):
|
| + '''Returns a stream that wraps the provided stream, making it read characters
|
| + using the specified encoding.'''
|
| + (e, d, sr, sw) = codecs.lookup(encoding)
|
| + return sr(stream)
|
| +
|
| +
|
| +def WrapOutputStream(stream, encoding = 'utf-8'):
|
| + '''Returns a stream that wraps the provided stream, making it write
|
| + characters using the specified encoding.'''
|
| + (e, d, sr, sw) = codecs.lookup(encoding)
|
| + return sw(stream)
|
| +
|
| +
|
| +def ChangeStdoutEncoding(encoding = 'utf-8'):
|
| + '''Changes STDOUT to print characters using the specified encoding.'''
|
| + sys.stdout = WrapOutputStream(sys.stdout, encoding)
|
| +
|
| +
|
| +def EscapeHtml(text, escape_quotes = False):
|
| + '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML
|
| + entities. Any existing named entity or HTML entity defined by decimal or
|
| + hex code will be left untouched. This is appropriate for escaping text for
|
| + inclusion in HTML, but not for XML.
|
| + '''
|
| + def Replace(match):
|
| + if match.group() == '&': return '&'
|
| + elif match.group() == '<': return '<'
|
| + elif match.group() == '>': return '>'
|
| + elif match.group() == '"':
|
| + if escape_quotes: return '"'
|
| + else: return match.group()
|
| + else: assert False
|
| + out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text)
|
| + return out
|
| +
|
| +
|
| +def UnescapeHtml(text, replace_nbsp=True):
|
| + '''Returns 'text' with all HTML character entities (both named character
|
| + entities and those specified by decimal or hexadecimal Unicode ordinal)
|
| + replaced by their Unicode characters (or latin1 characters if possible).
|
| +
|
| + The only exception is that will not be escaped if 'replace_nbsp' is
|
| + False.
|
| + '''
|
| + def Replace(match):
|
| + groups = match.groupdict()
|
| + if groups['hex']:
|
| + return unichr(int(groups['hex'], 16))
|
| + elif groups['decimal']:
|
| + return unichr(int(groups['decimal'], 10))
|
| + else:
|
| + name = groups['named']
|
| + if name == 'nbsp' and not replace_nbsp:
|
| + return match.group() # Don't replace
|
| + assert name != None
|
| + if name in htmlentitydefs.name2codepoint.keys():
|
| + return unichr(htmlentitydefs.name2codepoint[name])
|
| + else:
|
| + return match.group() # Unknown HTML character entity - don't replace
|
| +
|
| + out = _HTML_ENTITY.sub(Replace, text)
|
| + return out
|
| +
|
| +
|
| +def EncodeCdata(cdata):
|
| + '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]>
|
| + format, depending on which is more appropriate for easy editing. The data
|
| + is escaped for inclusion in an XML element's body.
|
| +
|
| + Args:
|
| + cdata: 'If x < y and y < z then x < z'
|
| +
|
| + Return:
|
| + '<![CDATA[If x < y and y < z then x < z]]>'
|
| + '''
|
| + if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0:
|
| + return '<![CDATA[%s]]>' % cdata
|
| + else:
|
| + return saxutils.escape(cdata)
|
| +
|
| +
|
| +def FixupNamedParam(function, param_name, param_value):
|
| + '''Returns a closure that is identical to 'function' but ensures that the
|
| + named parameter 'param_name' is always set to 'param_value' unless explicitly
|
| + set by the caller.
|
| +
|
| + Args:
|
| + function: callable
|
| + param_name: 'bingo'
|
| + param_value: 'bongo' (any type)
|
| +
|
| + Return:
|
| + callable
|
| + '''
|
| + def FixupClosure(*args, **kw):
|
| + if not param_name in kw:
|
| + kw[param_name] = param_value
|
| + return function(*args, **kw)
|
| + return FixupClosure
|
| +
|
| +
|
| +def PathFromRoot(path):
|
| + '''Takes a path relative to the root directory for GRIT (the one that grit.py
|
| + resides in) and returns a path that is either absolute or relative to the
|
| + current working directory (i.e .a path you can use to open the file).
|
| +
|
| + Args:
|
| + path: 'rel_dir\file.ext'
|
| +
|
| + Return:
|
| + 'c:\src\tools\rel_dir\file.ext
|
| + '''
|
| + return os.path.normpath(os.path.join(_root_dir, path))
|
| +
|
| +
|
| +def FixRootForUnittest(root_node, dir=PathFromRoot('.')):
|
| + '''Adds a GetBaseDir() method to 'root_node', making unittesting easier.'''
|
| + def GetBaseDir():
|
| + '''Returns a fake base directory.'''
|
| + return dir
|
| + def GetSourceLanguage():
|
| + return 'en'
|
| + if not hasattr(root_node, 'GetBaseDir'):
|
| + setattr(root_node, 'GetBaseDir', GetBaseDir)
|
| + setattr(root_node, 'GetSourceLanguage', GetSourceLanguage)
|
| +
|
| +
|
| +def dirname(filename):
|
| + '''Version of os.path.dirname() that never returns empty paths (returns
|
| + '.' if the result of os.path.dirname() is empty).
|
| + '''
|
| + ret = os.path.dirname(filename)
|
| + if ret == '':
|
| + ret = '.'
|
| + return ret
|
| +
|
| +
|
| +def normpath(path):
|
| + '''Version of os.path.normpath that also changes backward slashes to
|
| + forward slashes when not running on Windows.
|
| + '''
|
| + # This is safe to always do because the Windows version of os.path.normpath
|
| + # will replace forward slashes with backward slashes.
|
| + path = path.replace('\\', '/')
|
| + return os.path.normpath(path)
|
| +
|
| +
|
| +_LANGUAGE_SPLIT_RE = re.compile('-|_|/')
|
| +
|
| +
|
| +def CanonicalLanguage(code):
|
| + '''Canonicalizes two-part language codes by using a dash and making the
|
| + second part upper case. Returns one-part language codes unchanged.
|
| +
|
| + Args:
|
| + code: 'zh_cn'
|
| +
|
| + Return:
|
| + code: 'zh-CN'
|
| + '''
|
| + parts = _LANGUAGE_SPLIT_RE.split(code)
|
| + code = [ parts[0] ]
|
| + for part in parts[1:]:
|
| + code.append(part.upper())
|
| + return '-'.join(code)
|
| +
|
| +
|
| +_LANG_TO_CODEPAGE = {
|
| + 'en' : 1252,
|
| + 'fr' : 1252,
|
| + 'it' : 1252,
|
| + 'de' : 1252,
|
| + 'es' : 1252,
|
| + 'nl' : 1252,
|
| + 'sv' : 1252,
|
| + 'no' : 1252,
|
| + 'da' : 1252,
|
| + 'fi' : 1252,
|
| + 'pt-BR' : 1252,
|
| + 'ru' : 1251,
|
| + 'ja' : 932,
|
| + 'zh-TW' : 950,
|
| + 'zh-CN' : 936,
|
| + 'ko' : 949,
|
| +}
|
| +
|
| +
|
| +def LanguageToCodepage(lang):
|
| + '''Returns the codepage _number_ that can be used to represent 'lang', which
|
| + may be either in formats such as 'en', 'pt_br', 'pt-BR', etc.
|
| +
|
| + The codepage returned will be one of the 'cpXXXX' codepage numbers.
|
| +
|
| + Args:
|
| + lang: 'de'
|
| +
|
| + Return:
|
| + 1252
|
| + '''
|
| + lang = CanonicalLanguage(lang)
|
| + if lang in _LANG_TO_CODEPAGE:
|
| + return _LANG_TO_CODEPAGE[lang]
|
| + else:
|
| + print "Not sure which codepage to use for %s, assuming cp1252" % lang
|
| + return 1252
|
| +
|
| +def NewClassInstance(class_name, class_type):
|
| + '''Returns an instance of the class specified in classname
|
| +
|
| + Args:
|
| + class_name: the fully qualified, dot separated package + classname,
|
| + i.e. "my.package.name.MyClass". Short class names are not supported.
|
| + class_type: the class or superclass this object must implement
|
| +
|
| + Return:
|
| + An instance of the class, or None if none was found
|
| + '''
|
| + lastdot = class_name.rfind('.')
|
| + module_name = ''
|
| + if lastdot >= 0:
|
| + module_name = class_name[0:lastdot]
|
| + if module_name:
|
| + class_name = class_name[lastdot+1:]
|
| + module = __import__(module_name, globals(), locals(), [''])
|
| + if hasattr(module, class_name):
|
| + class_ = getattr(module, class_name)
|
| + class_instance = class_()
|
| + if isinstance(class_instance, class_type):
|
| + return class_instance
|
| + return None
|
| +
|
| +
|
| +def FixLineEnd(text, line_end):
|
| + # First normalize
|
| + text = text.replace('\r\n', '\n')
|
| + text = text.replace('\r', '\n')
|
| + # Then fix
|
| + text = text.replace('\n', line_end)
|
| + return text
|
| +
|
| +
|
| +def BoolToString(bool):
|
| + if bool:
|
| + return 'true'
|
| + else:
|
| + return 'false'
|
| +
|
| +
|
| +verbose = False
|
| +extra_verbose = False
|
| +
|
| +def IsVerbose():
|
| + return verbose
|
| +
|
| +def IsExtraVerbose():
|
| + return extra_verbose
|
| +
|
| +def GetCurrentYear():
|
| + '''Returns the current 4-digit year as an integer.'''
|
| + return time.localtime()[0]
|
| +
|
|
|
| Property changes on: grit/util.py
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|