grit/util.py - Issue 7994004: Initial source commit to grit-i18n project.

Side by Side Diff: grit/util.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 9 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 #!/usr/bin/python2.4

	2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 '''Utilities used by GRIT.

	7 '''

	8

	9 import sys

	10 import os.path

	11 import codecs

	12 import htmlentitydefs

	13 import re

	14 import time

	15 from xml.sax import saxutils

	16

	17 _root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))

	18

	19

	20 # Matches all of the resource IDs predefined by Windows.

	21 # The '\b' before and after each word makes sure these match only whole words an d

	22 # not the beginning of any word.. eg. ID_FILE_NEW will not match ID_FILE_NEW_PRO JECT

	23 # see http://www.amk.ca/python/howto/regex/ (search for "\bclass\b" inside the h tml page)

	24 SYSTEM_IDENTIFIERS = re.compile(

	25 r'''\bIDOK\b \| \bIDCANCEL\b \| \bIDC_STATIC\b \| \bIDYES\b \| \bIDNO\b \|

	26 \bID_FILE_NEW\b \| \bID_FILE_OPEN\b \| \bID_FILE_CLOSE\b \| \bID_FILE_SAVE\b \|

	27 \bID_FILE_SAVE_AS\b \| \bID_FILE_PAGE_SETUP\b \| \bID_FILE_PRINT_SETUP\b \|

	28 \bID_FILE_PRINT\b \| \bID_FILE_PRINT_DIRECT\b \| \bID_FILE_PRINT_PREVIEW\b \|

	29 \bID_FILE_UPDATE\b \| \bID_FILE_SAVE_COPY_AS\b \| \bID_FILE_SEND_MAIL\b \|

	30 \bID_FILE_MRU_FIRST\b \| \bID_FILE_MRU_LAST\b \|

	31 \bID_EDIT_CLEAR\b \| \bID_EDIT_CLEAR_ALL\b \| \bID_EDIT_COPY\b \|

	32 \bID_EDIT_CUT\b \| \bID_EDIT_FIND\b \| \bID_EDIT_PASTE\b \| \bID_EDIT_PASTE_L INK\b \|

	33 \bID_EDIT_PASTE_SPECIAL\b \| \bID_EDIT_REPEAT\b \| \bID_EDIT_REPLACE\b \|

	34 \bID_EDIT_SELECT_ALL\b \| \bID_EDIT_UNDO\b \| \bID_EDIT_REDO\b \|

	35 \bVS_VERSION_INFO\b \| \bIDRETRY''', re.VERBOSE);

	36

	37

	38 # Matches character entities, whether specified by name, decimal or hex.

	39 _HTML_ENTITY = re.compile(

	40 '&(#(?P<decimal>[0-9]+)\|#x(?P<hex>[a-fA-F0-9]+)\|(?P<named>[a-z0-9]+));',

	41 re.IGNORECASE)

	42

	43 # Matches characters that should be HTML-escaped. This is <, > and &, but only

	44 # if the & is not the start of an HTML character entity.

	45 _HTML_CHARS_TO_ESCAPE = re.compile('"\|<\|>\|&(?!#[0-9]+\|#x[0-9a-z]+\|[a-z]+;)',

	46 re.IGNORECASE \| re.MULTILINE)

	47

	48

	49 def WrapInputStream(stream, encoding = 'utf-8'):

	50 '''Returns a stream that wraps the provided stream, making it read characters

	51 using the specified encoding.'''

	52 (e, d, sr, sw) = codecs.lookup(encoding)

	53 return sr(stream)

	54

	55

	56 def WrapOutputStream(stream, encoding = 'utf-8'):

	57 '''Returns a stream that wraps the provided stream, making it write

	58 characters using the specified encoding.'''

	59 (e, d, sr, sw) = codecs.lookup(encoding)

	60 return sw(stream)

	61

	62

	63 def ChangeStdoutEncoding(encoding = 'utf-8'):

	64 '''Changes STDOUT to print characters using the specified encoding.'''

	65 sys.stdout = WrapOutputStream(sys.stdout, encoding)

	66

	67

	68 def EscapeHtml(text, escape_quotes = False):

	69 '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML

	70 entities. Any existing named entity or HTML entity defined by decimal or

	71 hex code will be left untouched. This is appropriate for escaping text for

	72 inclusion in HTML, but not for XML.

	73 '''

	74 def Replace(match):

	75 if match.group() == '&': return '&'

	76 elif match.group() == '<': return '<'

	77 elif match.group() == '>': return '>'

	78 elif match.group() == '"':

	79 if escape_quotes: return '"'

	80 else: return match.group()

	81 else: assert False

	82 out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text)

	83 return out

	84

	85

	86 def UnescapeHtml(text, replace_nbsp=True):

	87 '''Returns 'text' with all HTML character entities (both named character

	88 entities and those specified by decimal or hexadecimal Unicode ordinal)

	89 replaced by their Unicode characters (or latin1 characters if possible).

	90

	91 The only exception is that   will not be escaped if 'replace_nbsp' is

	92 False.

	93 '''

	94 def Replace(match):

	95 groups = match.groupdict()

	96 if groups['hex']:

	97 return unichr(int(groups['hex'], 16))

	98 elif groups['decimal']:

	99 return unichr(int(groups['decimal'], 10))

	100 else:

	101 name = groups['named']

	102 if name == 'nbsp' and not replace_nbsp:

	103 return match.group() # Don't replace

	104 assert name != None

	105 if name in htmlentitydefs.name2codepoint.keys():

	106 return unichr(htmlentitydefs.name2codepoint[name])

	107 else:

	108 return match.group() # Unknown HTML character entity - don't replace

	109

	110 out = _HTML_ENTITY.sub(Replace, text)

	111 return out

	112

	113

	114 def EncodeCdata(cdata):

	115 '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]>

	116 format, depending on which is more appropriate for easy editing. The data

	117 is escaped for inclusion in an XML element's body.

	118

	119 Args:

	120 cdata: 'If x < y and y < z then x < z'

	121

	122 Return:

	123 '<![CDATA[If x < y and y < z then x < z]]>'

	124 '''

	125 if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0:

	126 return '<![CDATA[%s]]>' % cdata

	127 else:

	128 return saxutils.escape(cdata)

	129

	130

	131 def FixupNamedParam(function, param_name, param_value):

	132 '''Returns a closure that is identical to 'function' but ensures that the

	133 named parameter 'param_name' is always set to 'param_value' unless explicitly

	134 set by the caller.

	135

	136 Args:

	137 function: callable

	138 param_name: 'bingo'

	139 param_value: 'bongo' (any type)

	140

	141 Return:

	142 callable

	143 '''

	144 def FixupClosure(args, *kw):

	145 if not param_name in kw:

	146 kw[param_name] = param_value

	147 return function(args, *kw)

	148 return FixupClosure

	149

	150

	151 def PathFromRoot(path):

	152 '''Takes a path relative to the root directory for GRIT (the one that grit.py

	153 resides in) and returns a path that is either absolute or relative to the

	154 current working directory (i.e .a path you can use to open the file).

	155

	156 Args:

	157 path: 'rel_dir\file.ext'

	158

	159 Return:

	160 'c:\src\tools\rel_dir\file.ext

	161 '''

	162 return os.path.normpath(os.path.join(_root_dir, path))

	163

	164

	165 def FixRootForUnittest(root_node, dir=PathFromRoot('.')):

	166 '''Adds a GetBaseDir() method to 'root_node', making unittesting easier.'''

	167 def GetBaseDir():

	168 '''Returns a fake base directory.'''

	169 return dir

	170 def GetSourceLanguage():

	171 return 'en'

	172 if not hasattr(root_node, 'GetBaseDir'):

	173 setattr(root_node, 'GetBaseDir', GetBaseDir)

	174 setattr(root_node, 'GetSourceLanguage', GetSourceLanguage)

	175

	176

	177 def dirname(filename):

	178 '''Version of os.path.dirname() that never returns empty paths (returns

	179 '.' if the result of os.path.dirname() is empty).

	180 '''

	181 ret = os.path.dirname(filename)

	182 if ret == '':

	183 ret = '.'

	184 return ret

	185

	186

	187 def normpath(path):

	188 '''Version of os.path.normpath that also changes backward slashes to

	189 forward slashes when not running on Windows.

	190 '''

	191 # This is safe to always do because the Windows version of os.path.normpath

	192 # will replace forward slashes with backward slashes.

	193 path = path.replace('\\', '/')

	194 return os.path.normpath(path)

	195

	196

	197 _LANGUAGE_SPLIT_RE = re.compile('-\|_\|/')

	198

	199

	200 def CanonicalLanguage(code):

	201 '''Canonicalizes two-part language codes by using a dash and making the

	202 second part upper case. Returns one-part language codes unchanged.

	203

	204 Args:

	205 code: 'zh_cn'

	206

	207 Return:

	208 code: 'zh-CN'

	209 '''

	210 parts = _LANGUAGE_SPLIT_RE.split(code)

	211 code = [ parts[0] ]

	212 for part in parts[1:]:

	213 code.append(part.upper())

	214 return '-'.join(code)

	215

	216

	217 _LANG_TO_CODEPAGE = {

	218 'en' : 1252,

	219 'fr' : 1252,

	220 'it' : 1252,

	221 'de' : 1252,

	222 'es' : 1252,

	223 'nl' : 1252,

	224 'sv' : 1252,

	225 'no' : 1252,

	226 'da' : 1252,

	227 'fi' : 1252,

	228 'pt-BR' : 1252,

	229 'ru' : 1251,

	230 'ja' : 932,

	231 'zh-TW' : 950,

	232 'zh-CN' : 936,

	233 'ko' : 949,

	234 }

	235

	236

	237 def LanguageToCodepage(lang):

	238 '''Returns the codepage _number_ that can be used to represent 'lang', which

	239 may be either in formats such as 'en', 'pt_br', 'pt-BR', etc.

	240

	241 The codepage returned will be one of the 'cpXXXX' codepage numbers.

	242

	243 Args:

	244 lang: 'de'

	245

	246 Return:

	247 1252

	248 '''

	249 lang = CanonicalLanguage(lang)

	250 if lang in _LANG_TO_CODEPAGE:

	251 return _LANG_TO_CODEPAGE[lang]

	252 else:

	253 print "Not sure which codepage to use for %s, assuming cp1252" % lang

	254 return 1252

	255

	256 def NewClassInstance(class_name, class_type):

	257 '''Returns an instance of the class specified in classname

	258

	259 Args:

	260 class_name: the fully qualified, dot separated package + classname,

	261 i.e. "my.package.name.MyClass". Short class names are not supported.

	262 class_type: the class or superclass this object must implement

	263

	264 Return:

	265 An instance of the class, or None if none was found

	266 '''

	267 lastdot = class_name.rfind('.')

	268 module_name = ''

	269 if lastdot >= 0:

	270 module_name = class_name[0:lastdot]

	271 if module_name:

	272 class_name = class_name[lastdot+1:]

	273 module = __import__(module_name, globals(), locals(), [''])

	274 if hasattr(module, class_name):

	275 class_ = getattr(module, class_name)

	276 class_instance = class_()

	277 if isinstance(class_instance, class_type):

	278 return class_instance

	279 return None

	280

	281

	282 def FixLineEnd(text, line_end):

	283 # First normalize

	284 text = text.replace('\r\n', '\n')

	285 text = text.replace('\r', '\n')

	286 # Then fix

	287 text = text.replace('\n', line_end)

	288 return text

	289

	290

	291 def BoolToString(bool):

	292 if bool:

	293 return 'true'

	294 else:

	295 return 'false'

	296

	297

	298 verbose = False

	299 extra_verbose = False

	300

	301 def IsVerbose():

	302 return verbose

	303

	304 def IsExtraVerbose():

	305 return extra_verbose

	306

	307 def GetCurrentYear():

	308 '''Returns the current 4-digit year as an integer.'''

	309 return time.localtime()[0]

	310

OLD	NEW

« no previous file with comments | « grit/tool/unit.py ('k') | grit/util_unittest.py » ('j') | no next file with comments »