| Index: grit/util.py
 | 
| ===================================================================
 | 
| --- grit/util.py	(revision 0)
 | 
| +++ grit/util.py	(revision 0)
 | 
| @@ -0,0 +1,310 @@
 | 
| +#!/usr/bin/python2.4
 | 
| +# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
 | 
| +# Use of this source code is governed by a BSD-style license that can be
 | 
| +# found in the LICENSE file.
 | 
| +
 | 
| +'''Utilities used by GRIT.
 | 
| +'''
 | 
| +
 | 
| +import sys
 | 
| +import os.path
 | 
| +import codecs
 | 
| +import htmlentitydefs
 | 
| +import re
 | 
| +import time
 | 
| +from xml.sax import saxutils
 | 
| +
 | 
| +_root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
 | 
| +
 | 
| +
 | 
| +# Matches all of the resource IDs predefined by Windows.
 | 
| +# The '\b' before and after each word makes sure these match only whole words and
 | 
| +# not the beginning of any word.. eg. ID_FILE_NEW will not match ID_FILE_NEW_PROJECT
 | 
| +# see http://www.amk.ca/python/howto/regex/ (search for "\bclass\b" inside the html page)
 | 
| +SYSTEM_IDENTIFIERS = re.compile(
 | 
| +  r'''\bIDOK\b | \bIDCANCEL\b | \bIDC_STATIC\b | \bIDYES\b | \bIDNO\b |
 | 
| +      \bID_FILE_NEW\b | \bID_FILE_OPEN\b | \bID_FILE_CLOSE\b | \bID_FILE_SAVE\b |
 | 
| +      \bID_FILE_SAVE_AS\b | \bID_FILE_PAGE_SETUP\b | \bID_FILE_PRINT_SETUP\b |
 | 
| +      \bID_FILE_PRINT\b | \bID_FILE_PRINT_DIRECT\b | \bID_FILE_PRINT_PREVIEW\b |
 | 
| +      \bID_FILE_UPDATE\b | \bID_FILE_SAVE_COPY_AS\b | \bID_FILE_SEND_MAIL\b |
 | 
| +      \bID_FILE_MRU_FIRST\b | \bID_FILE_MRU_LAST\b |
 | 
| +      \bID_EDIT_CLEAR\b | \bID_EDIT_CLEAR_ALL\b | \bID_EDIT_COPY\b |
 | 
| +      \bID_EDIT_CUT\b | \bID_EDIT_FIND\b | \bID_EDIT_PASTE\b | \bID_EDIT_PASTE_LINK\b |
 | 
| +      \bID_EDIT_PASTE_SPECIAL\b | \bID_EDIT_REPEAT\b | \bID_EDIT_REPLACE\b |
 | 
| +      \bID_EDIT_SELECT_ALL\b | \bID_EDIT_UNDO\b | \bID_EDIT_REDO\b |
 | 
| +      \bVS_VERSION_INFO\b | \bIDRETRY''', re.VERBOSE);
 | 
| +
 | 
| +
 | 
| +# Matches character entities, whether specified by name, decimal or hex.
 | 
| +_HTML_ENTITY = re.compile(
 | 
| +  '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));',
 | 
| +  re.IGNORECASE)
 | 
| +
 | 
| +# Matches characters that should be HTML-escaped.  This is <, > and &, but only
 | 
| +# if the & is not the start of an HTML character entity.
 | 
| +_HTML_CHARS_TO_ESCAPE = re.compile('"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)',
 | 
| +                                   re.IGNORECASE | re.MULTILINE)
 | 
| +
 | 
| +
 | 
| +def WrapInputStream(stream, encoding = 'utf-8'):
 | 
| +  '''Returns a stream that wraps the provided stream, making it read characters
 | 
| +  using the specified encoding.'''
 | 
| +  (e, d, sr, sw) = codecs.lookup(encoding)
 | 
| +  return sr(stream)
 | 
| +
 | 
| +
 | 
| +def WrapOutputStream(stream, encoding = 'utf-8'):
 | 
| +  '''Returns a stream that wraps the provided stream, making it write
 | 
| +  characters using the specified encoding.'''
 | 
| +  (e, d, sr, sw) = codecs.lookup(encoding)
 | 
| +  return sw(stream)
 | 
| +
 | 
| +
 | 
| +def ChangeStdoutEncoding(encoding = 'utf-8'):
 | 
| +  '''Changes STDOUT to print characters using the specified encoding.'''
 | 
| +  sys.stdout = WrapOutputStream(sys.stdout, encoding)
 | 
| +
 | 
| +
 | 
| +def EscapeHtml(text, escape_quotes = False):
 | 
| +  '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML
 | 
| +  entities.  Any existing named entity or HTML entity defined by decimal or
 | 
| +  hex code will be left untouched.  This is appropriate for escaping text for
 | 
| +  inclusion in HTML, but not for XML.
 | 
| +  '''
 | 
| +  def Replace(match):
 | 
| +    if match.group() == '&': return '&'
 | 
| +    elif match.group() == '<': return '<'
 | 
| +    elif match.group() == '>': return '>'
 | 
| +    elif match.group() == '"':
 | 
| +      if escape_quotes: return '"'
 | 
| +      else: return match.group()
 | 
| +    else: assert False
 | 
| +  out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text)
 | 
| +  return out
 | 
| +
 | 
| +
 | 
| +def UnescapeHtml(text, replace_nbsp=True):
 | 
| +  '''Returns 'text' with all HTML character entities (both named character
 | 
| +  entities and those specified by decimal or hexadecimal Unicode ordinal)
 | 
| +  replaced by their Unicode characters (or latin1 characters if possible).
 | 
| +
 | 
| +  The only exception is that   will not be escaped if 'replace_nbsp' is
 | 
| +  False.
 | 
| +  '''
 | 
| +  def Replace(match):
 | 
| +    groups = match.groupdict()
 | 
| +    if groups['hex']:
 | 
| +      return unichr(int(groups['hex'], 16))
 | 
| +    elif groups['decimal']:
 | 
| +      return unichr(int(groups['decimal'], 10))
 | 
| +    else:
 | 
| +      name = groups['named']
 | 
| +      if name == 'nbsp' and not replace_nbsp:
 | 
| +        return match.group()  # Don't replace  
 | 
| +      assert name != None
 | 
| +      if name in htmlentitydefs.name2codepoint.keys():
 | 
| +        return unichr(htmlentitydefs.name2codepoint[name])
 | 
| +      else:
 | 
| +        return match.group()  # Unknown HTML character entity - don't replace
 | 
| +
 | 
| +  out = _HTML_ENTITY.sub(Replace, text)
 | 
| +  return out
 | 
| +
 | 
| +
 | 
| +def EncodeCdata(cdata):
 | 
| +  '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]>
 | 
| +  format, depending on which is more appropriate for easy editing.  The data
 | 
| +  is escaped for inclusion in an XML element's body.
 | 
| +
 | 
| +  Args:
 | 
| +    cdata: 'If x < y and y < z then x < z'
 | 
| +
 | 
| +  Return:
 | 
| +    '<![CDATA[If x < y and y < z then x < z]]>'
 | 
| +  '''
 | 
| +  if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0:
 | 
| +    return '<![CDATA[%s]]>' % cdata
 | 
| +  else:
 | 
| +    return saxutils.escape(cdata)
 | 
| +
 | 
| +
 | 
| +def FixupNamedParam(function, param_name, param_value):
 | 
| +  '''Returns a closure that is identical to 'function' but ensures that the
 | 
| +  named parameter 'param_name' is always set to 'param_value' unless explicitly
 | 
| +  set by the caller.
 | 
| +
 | 
| +  Args:
 | 
| +    function: callable
 | 
| +    param_name: 'bingo'
 | 
| +    param_value: 'bongo' (any type)
 | 
| +
 | 
| +  Return:
 | 
| +    callable
 | 
| +  '''
 | 
| +  def FixupClosure(*args, **kw):
 | 
| +    if not param_name in kw:
 | 
| +      kw[param_name] = param_value
 | 
| +    return function(*args, **kw)
 | 
| +  return FixupClosure
 | 
| +
 | 
| +
 | 
| +def PathFromRoot(path):
 | 
| +  '''Takes a path relative to the root directory for GRIT (the one that grit.py
 | 
| +  resides in) and returns a path that is either absolute or relative to the
 | 
| +  current working directory (i.e .a path you can use to open the file).
 | 
| +
 | 
| +  Args:
 | 
| +    path: 'rel_dir\file.ext'
 | 
| +
 | 
| +  Return:
 | 
| +    'c:\src\tools\rel_dir\file.ext
 | 
| +  '''
 | 
| +  return os.path.normpath(os.path.join(_root_dir, path))
 | 
| +
 | 
| +
 | 
| +def FixRootForUnittest(root_node, dir=PathFromRoot('.')):
 | 
| +  '''Adds a GetBaseDir() method to 'root_node', making unittesting easier.'''
 | 
| +  def GetBaseDir():
 | 
| +    '''Returns a fake base directory.'''
 | 
| +    return dir
 | 
| +  def GetSourceLanguage():
 | 
| +    return 'en'
 | 
| +  if not hasattr(root_node, 'GetBaseDir'):
 | 
| +    setattr(root_node, 'GetBaseDir', GetBaseDir)
 | 
| +    setattr(root_node, 'GetSourceLanguage', GetSourceLanguage)
 | 
| +
 | 
| +
 | 
| +def dirname(filename):
 | 
| +  '''Version of os.path.dirname() that never returns empty paths (returns
 | 
| +  '.' if the result of os.path.dirname() is empty).
 | 
| +  '''
 | 
| +  ret = os.path.dirname(filename)
 | 
| +  if ret == '':
 | 
| +    ret = '.'
 | 
| +  return ret
 | 
| +
 | 
| +
 | 
| +def normpath(path):
 | 
| +  '''Version of os.path.normpath that also changes backward slashes to
 | 
| +  forward slashes when not running on Windows.
 | 
| +  '''
 | 
| +  # This is safe to always do because the Windows version of os.path.normpath
 | 
| +  # will replace forward slashes with backward slashes.
 | 
| +  path = path.replace('\\', '/')
 | 
| +  return os.path.normpath(path)
 | 
| +
 | 
| +
 | 
| +_LANGUAGE_SPLIT_RE = re.compile('-|_|/')
 | 
| +
 | 
| +
 | 
| +def CanonicalLanguage(code):
 | 
| +  '''Canonicalizes two-part language codes by using a dash and making the
 | 
| +  second part upper case.  Returns one-part language codes unchanged.
 | 
| +
 | 
| +  Args:
 | 
| +    code: 'zh_cn'
 | 
| +
 | 
| +  Return:
 | 
| +    code: 'zh-CN'
 | 
| +  '''
 | 
| +  parts = _LANGUAGE_SPLIT_RE.split(code)
 | 
| +  code = [ parts[0] ]
 | 
| +  for part in parts[1:]:
 | 
| +    code.append(part.upper())
 | 
| +  return '-'.join(code)
 | 
| +
 | 
| +
 | 
| +_LANG_TO_CODEPAGE = {
 | 
| +  'en' : 1252,
 | 
| +  'fr' : 1252,
 | 
| +  'it' : 1252,
 | 
| +  'de' : 1252,
 | 
| +  'es' : 1252,
 | 
| +  'nl' : 1252,
 | 
| +  'sv' : 1252,
 | 
| +  'no' : 1252,
 | 
| +  'da' : 1252,
 | 
| +  'fi' : 1252,
 | 
| +  'pt-BR' : 1252,
 | 
| +  'ru' : 1251,
 | 
| +  'ja' : 932,
 | 
| +  'zh-TW' : 950,
 | 
| +  'zh-CN' : 936,
 | 
| +  'ko' : 949,
 | 
| +}
 | 
| +
 | 
| +
 | 
| +def LanguageToCodepage(lang):
 | 
| +  '''Returns the codepage _number_ that can be used to represent 'lang', which
 | 
| +  may be either in formats such as 'en', 'pt_br', 'pt-BR', etc.
 | 
| +
 | 
| +  The codepage returned will be one of the 'cpXXXX' codepage numbers.
 | 
| +
 | 
| +  Args:
 | 
| +    lang: 'de'
 | 
| +
 | 
| +  Return:
 | 
| +    1252
 | 
| +  '''
 | 
| +  lang = CanonicalLanguage(lang)
 | 
| +  if lang in _LANG_TO_CODEPAGE:
 | 
| +    return _LANG_TO_CODEPAGE[lang]
 | 
| +  else:
 | 
| +    print "Not sure which codepage to use for %s, assuming cp1252" % lang
 | 
| +    return 1252
 | 
| +
 | 
| +def NewClassInstance(class_name, class_type):
 | 
| +  '''Returns an instance of the class specified in classname
 | 
| +
 | 
| +  Args:
 | 
| +    class_name: the fully qualified, dot separated package + classname,
 | 
| +    i.e. "my.package.name.MyClass". Short class names are not supported.
 | 
| +    class_type: the class or superclass this object must implement
 | 
| +
 | 
| +  Return:
 | 
| +    An instance of the class, or None if none was found
 | 
| +  '''
 | 
| +  lastdot = class_name.rfind('.')
 | 
| +  module_name = ''
 | 
| +  if lastdot >= 0:
 | 
| +    module_name = class_name[0:lastdot]
 | 
| +    if module_name:
 | 
| +      class_name = class_name[lastdot+1:]
 | 
| +      module = __import__(module_name, globals(), locals(), [''])
 | 
| +      if hasattr(module, class_name):
 | 
| +        class_ = getattr(module, class_name)
 | 
| +        class_instance = class_()
 | 
| +        if isinstance(class_instance, class_type):
 | 
| +          return class_instance
 | 
| +  return None
 | 
| +
 | 
| +
 | 
| +def FixLineEnd(text, line_end):
 | 
| +  # First normalize
 | 
| +  text = text.replace('\r\n', '\n')
 | 
| +  text = text.replace('\r', '\n')
 | 
| +  # Then fix
 | 
| +  text = text.replace('\n', line_end)
 | 
| +  return text
 | 
| +
 | 
| +
 | 
| +def BoolToString(bool):
 | 
| +  if bool:
 | 
| +    return 'true'
 | 
| +  else:
 | 
| +    return 'false'
 | 
| +
 | 
| +
 | 
| +verbose = False
 | 
| +extra_verbose = False
 | 
| +
 | 
| +def IsVerbose():
 | 
| +  return verbose
 | 
| +
 | 
| +def IsExtraVerbose():
 | 
| +  return extra_verbose
 | 
| +
 | 
| +def GetCurrentYear():
 | 
| +  '''Returns the current 4-digit year as an integer.'''
 | 
| +  return time.localtime()[0]
 | 
| +
 | 
| 
 | 
| Property changes on: grit/util.py
 | 
| ___________________________________________________________________
 | 
| Added: svn:eol-style
 | 
|    + LF
 | 
| 
 | 
| 
 |