Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1045)

Unified Diff: grit/util.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « grit/tool/unit.py ('k') | grit/util_unittest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: grit/util.py
===================================================================
--- grit/util.py (revision 0)
+++ grit/util.py (revision 0)
@@ -0,0 +1,310 @@
+#!/usr/bin/python2.4
+# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+'''Utilities used by GRIT.
+'''
+
+import sys
+import os.path
+import codecs
+import htmlentitydefs
+import re
+import time
+from xml.sax import saxutils
+
+_root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
+
+
+# Matches all of the resource IDs predefined by Windows.
+# The '\b' before and after each word makes sure these match only whole words and
+# not the beginning of any word.. eg. ID_FILE_NEW will not match ID_FILE_NEW_PROJECT
+# see http://www.amk.ca/python/howto/regex/ (search for "\bclass\b" inside the html page)
+SYSTEM_IDENTIFIERS = re.compile(
+ r'''\bIDOK\b | \bIDCANCEL\b | \bIDC_STATIC\b | \bIDYES\b | \bIDNO\b |
+ \bID_FILE_NEW\b | \bID_FILE_OPEN\b | \bID_FILE_CLOSE\b | \bID_FILE_SAVE\b |
+ \bID_FILE_SAVE_AS\b | \bID_FILE_PAGE_SETUP\b | \bID_FILE_PRINT_SETUP\b |
+ \bID_FILE_PRINT\b | \bID_FILE_PRINT_DIRECT\b | \bID_FILE_PRINT_PREVIEW\b |
+ \bID_FILE_UPDATE\b | \bID_FILE_SAVE_COPY_AS\b | \bID_FILE_SEND_MAIL\b |
+ \bID_FILE_MRU_FIRST\b | \bID_FILE_MRU_LAST\b |
+ \bID_EDIT_CLEAR\b | \bID_EDIT_CLEAR_ALL\b | \bID_EDIT_COPY\b |
+ \bID_EDIT_CUT\b | \bID_EDIT_FIND\b | \bID_EDIT_PASTE\b | \bID_EDIT_PASTE_LINK\b |
+ \bID_EDIT_PASTE_SPECIAL\b | \bID_EDIT_REPEAT\b | \bID_EDIT_REPLACE\b |
+ \bID_EDIT_SELECT_ALL\b | \bID_EDIT_UNDO\b | \bID_EDIT_REDO\b |
+ \bVS_VERSION_INFO\b | \bIDRETRY''', re.VERBOSE);
+
+
+# Matches character entities, whether specified by name, decimal or hex.
+_HTML_ENTITY = re.compile(
+ '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));',
+ re.IGNORECASE)
+
+# Matches characters that should be HTML-escaped. This is <, > and &, but only
+# if the & is not the start of an HTML character entity.
+_HTML_CHARS_TO_ESCAPE = re.compile('"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)',
+ re.IGNORECASE | re.MULTILINE)
+
+
+def WrapInputStream(stream, encoding = 'utf-8'):
+ '''Returns a stream that wraps the provided stream, making it read characters
+ using the specified encoding.'''
+ (e, d, sr, sw) = codecs.lookup(encoding)
+ return sr(stream)
+
+
+def WrapOutputStream(stream, encoding = 'utf-8'):
+ '''Returns a stream that wraps the provided stream, making it write
+ characters using the specified encoding.'''
+ (e, d, sr, sw) = codecs.lookup(encoding)
+ return sw(stream)
+
+
+def ChangeStdoutEncoding(encoding = 'utf-8'):
+ '''Changes STDOUT to print characters using the specified encoding.'''
+ sys.stdout = WrapOutputStream(sys.stdout, encoding)
+
+
+def EscapeHtml(text, escape_quotes = False):
+ '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML
+ entities. Any existing named entity or HTML entity defined by decimal or
+ hex code will be left untouched. This is appropriate for escaping text for
+ inclusion in HTML, but not for XML.
+ '''
+ def Replace(match):
+ if match.group() == '&': return '&amp;'
+ elif match.group() == '<': return '&lt;'
+ elif match.group() == '>': return '&gt;'
+ elif match.group() == '"':
+ if escape_quotes: return '&quot;'
+ else: return match.group()
+ else: assert False
+ out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text)
+ return out
+
+
+def UnescapeHtml(text, replace_nbsp=True):
+ '''Returns 'text' with all HTML character entities (both named character
+ entities and those specified by decimal or hexadecimal Unicode ordinal)
+ replaced by their Unicode characters (or latin1 characters if possible).
+
+ The only exception is that &nbsp; will not be escaped if 'replace_nbsp' is
+ False.
+ '''
+ def Replace(match):
+ groups = match.groupdict()
+ if groups['hex']:
+ return unichr(int(groups['hex'], 16))
+ elif groups['decimal']:
+ return unichr(int(groups['decimal'], 10))
+ else:
+ name = groups['named']
+ if name == 'nbsp' and not replace_nbsp:
+ return match.group() # Don't replace &nbsp;
+ assert name != None
+ if name in htmlentitydefs.name2codepoint.keys():
+ return unichr(htmlentitydefs.name2codepoint[name])
+ else:
+ return match.group() # Unknown HTML character entity - don't replace
+
+ out = _HTML_ENTITY.sub(Replace, text)
+ return out
+
+
+def EncodeCdata(cdata):
+ '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]>
+ format, depending on which is more appropriate for easy editing. The data
+ is escaped for inclusion in an XML element's body.
+
+ Args:
+ cdata: 'If x < y and y < z then x < z'
+
+ Return:
+ '<![CDATA[If x < y and y < z then x < z]]>'
+ '''
+ if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0:
+ return '<![CDATA[%s]]>' % cdata
+ else:
+ return saxutils.escape(cdata)
+
+
+def FixupNamedParam(function, param_name, param_value):
+ '''Returns a closure that is identical to 'function' but ensures that the
+ named parameter 'param_name' is always set to 'param_value' unless explicitly
+ set by the caller.
+
+ Args:
+ function: callable
+ param_name: 'bingo'
+ param_value: 'bongo' (any type)
+
+ Return:
+ callable
+ '''
+ def FixupClosure(*args, **kw):
+ if not param_name in kw:
+ kw[param_name] = param_value
+ return function(*args, **kw)
+ return FixupClosure
+
+
+def PathFromRoot(path):
+ '''Takes a path relative to the root directory for GRIT (the one that grit.py
+ resides in) and returns a path that is either absolute or relative to the
+ current working directory (i.e .a path you can use to open the file).
+
+ Args:
+ path: 'rel_dir\file.ext'
+
+ Return:
+ 'c:\src\tools\rel_dir\file.ext
+ '''
+ return os.path.normpath(os.path.join(_root_dir, path))
+
+
+def FixRootForUnittest(root_node, dir=PathFromRoot('.')):
+ '''Adds a GetBaseDir() method to 'root_node', making unittesting easier.'''
+ def GetBaseDir():
+ '''Returns a fake base directory.'''
+ return dir
+ def GetSourceLanguage():
+ return 'en'
+ if not hasattr(root_node, 'GetBaseDir'):
+ setattr(root_node, 'GetBaseDir', GetBaseDir)
+ setattr(root_node, 'GetSourceLanguage', GetSourceLanguage)
+
+
+def dirname(filename):
+ '''Version of os.path.dirname() that never returns empty paths (returns
+ '.' if the result of os.path.dirname() is empty).
+ '''
+ ret = os.path.dirname(filename)
+ if ret == '':
+ ret = '.'
+ return ret
+
+
+def normpath(path):
+ '''Version of os.path.normpath that also changes backward slashes to
+ forward slashes when not running on Windows.
+ '''
+ # This is safe to always do because the Windows version of os.path.normpath
+ # will replace forward slashes with backward slashes.
+ path = path.replace('\\', '/')
+ return os.path.normpath(path)
+
+
+_LANGUAGE_SPLIT_RE = re.compile('-|_|/')
+
+
+def CanonicalLanguage(code):
+ '''Canonicalizes two-part language codes by using a dash and making the
+ second part upper case. Returns one-part language codes unchanged.
+
+ Args:
+ code: 'zh_cn'
+
+ Return:
+ code: 'zh-CN'
+ '''
+ parts = _LANGUAGE_SPLIT_RE.split(code)
+ code = [ parts[0] ]
+ for part in parts[1:]:
+ code.append(part.upper())
+ return '-'.join(code)
+
+
+_LANG_TO_CODEPAGE = {
+ 'en' : 1252,
+ 'fr' : 1252,
+ 'it' : 1252,
+ 'de' : 1252,
+ 'es' : 1252,
+ 'nl' : 1252,
+ 'sv' : 1252,
+ 'no' : 1252,
+ 'da' : 1252,
+ 'fi' : 1252,
+ 'pt-BR' : 1252,
+ 'ru' : 1251,
+ 'ja' : 932,
+ 'zh-TW' : 950,
+ 'zh-CN' : 936,
+ 'ko' : 949,
+}
+
+
+def LanguageToCodepage(lang):
+ '''Returns the codepage _number_ that can be used to represent 'lang', which
+ may be either in formats such as 'en', 'pt_br', 'pt-BR', etc.
+
+ The codepage returned will be one of the 'cpXXXX' codepage numbers.
+
+ Args:
+ lang: 'de'
+
+ Return:
+ 1252
+ '''
+ lang = CanonicalLanguage(lang)
+ if lang in _LANG_TO_CODEPAGE:
+ return _LANG_TO_CODEPAGE[lang]
+ else:
+ print "Not sure which codepage to use for %s, assuming cp1252" % lang
+ return 1252
+
+def NewClassInstance(class_name, class_type):
+ '''Returns an instance of the class specified in classname
+
+ Args:
+ class_name: the fully qualified, dot separated package + classname,
+ i.e. "my.package.name.MyClass". Short class names are not supported.
+ class_type: the class or superclass this object must implement
+
+ Return:
+ An instance of the class, or None if none was found
+ '''
+ lastdot = class_name.rfind('.')
+ module_name = ''
+ if lastdot >= 0:
+ module_name = class_name[0:lastdot]
+ if module_name:
+ class_name = class_name[lastdot+1:]
+ module = __import__(module_name, globals(), locals(), [''])
+ if hasattr(module, class_name):
+ class_ = getattr(module, class_name)
+ class_instance = class_()
+ if isinstance(class_instance, class_type):
+ return class_instance
+ return None
+
+
+def FixLineEnd(text, line_end):
+ # First normalize
+ text = text.replace('\r\n', '\n')
+ text = text.replace('\r', '\n')
+ # Then fix
+ text = text.replace('\n', line_end)
+ return text
+
+
+def BoolToString(bool):
+ if bool:
+ return 'true'
+ else:
+ return 'false'
+
+
+verbose = False
+extra_verbose = False
+
+def IsVerbose():
+ return verbose
+
+def IsExtraVerbose():
+ return extra_verbose
+
+def GetCurrentYear():
+ '''Returns the current 4-digit year as an integer.'''
+ return time.localtime()[0]
+
Property changes on: grit/util.py
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « grit/tool/unit.py ('k') | grit/util_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698