OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python2.4 |
| 2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 '''Utilities used by GRIT. |
| 7 ''' |
| 8 |
| 9 import sys |
| 10 import os.path |
| 11 import codecs |
| 12 import htmlentitydefs |
| 13 import re |
| 14 import time |
| 15 from xml.sax import saxutils |
| 16 |
| 17 _root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..')) |
| 18 |
| 19 |
| 20 # Matches all of the resource IDs predefined by Windows. |
| 21 # The '\b' before and after each word makes sure these match only whole words an
d |
| 22 # not the beginning of any word.. eg. ID_FILE_NEW will not match ID_FILE_NEW_PRO
JECT |
| 23 # see http://www.amk.ca/python/howto/regex/ (search for "\bclass\b" inside the h
tml page) |
| 24 SYSTEM_IDENTIFIERS = re.compile( |
| 25 r'''\bIDOK\b | \bIDCANCEL\b | \bIDC_STATIC\b | \bIDYES\b | \bIDNO\b | |
| 26 \bID_FILE_NEW\b | \bID_FILE_OPEN\b | \bID_FILE_CLOSE\b | \bID_FILE_SAVE\b
| |
| 27 \bID_FILE_SAVE_AS\b | \bID_FILE_PAGE_SETUP\b | \bID_FILE_PRINT_SETUP\b | |
| 28 \bID_FILE_PRINT\b | \bID_FILE_PRINT_DIRECT\b | \bID_FILE_PRINT_PREVIEW\b | |
| 29 \bID_FILE_UPDATE\b | \bID_FILE_SAVE_COPY_AS\b | \bID_FILE_SEND_MAIL\b | |
| 30 \bID_FILE_MRU_FIRST\b | \bID_FILE_MRU_LAST\b | |
| 31 \bID_EDIT_CLEAR\b | \bID_EDIT_CLEAR_ALL\b | \bID_EDIT_COPY\b | |
| 32 \bID_EDIT_CUT\b | \bID_EDIT_FIND\b | \bID_EDIT_PASTE\b | \bID_EDIT_PASTE_L
INK\b | |
| 33 \bID_EDIT_PASTE_SPECIAL\b | \bID_EDIT_REPEAT\b | \bID_EDIT_REPLACE\b | |
| 34 \bID_EDIT_SELECT_ALL\b | \bID_EDIT_UNDO\b | \bID_EDIT_REDO\b | |
| 35 \bVS_VERSION_INFO\b | \bIDRETRY''', re.VERBOSE); |
| 36 |
| 37 |
| 38 # Matches character entities, whether specified by name, decimal or hex. |
| 39 _HTML_ENTITY = re.compile( |
| 40 '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));', |
| 41 re.IGNORECASE) |
| 42 |
| 43 # Matches characters that should be HTML-escaped. This is <, > and &, but only |
| 44 # if the & is not the start of an HTML character entity. |
| 45 _HTML_CHARS_TO_ESCAPE = re.compile('"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)', |
| 46 re.IGNORECASE | re.MULTILINE) |
| 47 |
| 48 |
| 49 def WrapInputStream(stream, encoding = 'utf-8'): |
| 50 '''Returns a stream that wraps the provided stream, making it read characters |
| 51 using the specified encoding.''' |
| 52 (e, d, sr, sw) = codecs.lookup(encoding) |
| 53 return sr(stream) |
| 54 |
| 55 |
| 56 def WrapOutputStream(stream, encoding = 'utf-8'): |
| 57 '''Returns a stream that wraps the provided stream, making it write |
| 58 characters using the specified encoding.''' |
| 59 (e, d, sr, sw) = codecs.lookup(encoding) |
| 60 return sw(stream) |
| 61 |
| 62 |
| 63 def ChangeStdoutEncoding(encoding = 'utf-8'): |
| 64 '''Changes STDOUT to print characters using the specified encoding.''' |
| 65 sys.stdout = WrapOutputStream(sys.stdout, encoding) |
| 66 |
| 67 |
| 68 def EscapeHtml(text, escape_quotes = False): |
| 69 '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML |
| 70 entities. Any existing named entity or HTML entity defined by decimal or |
| 71 hex code will be left untouched. This is appropriate for escaping text for |
| 72 inclusion in HTML, but not for XML. |
| 73 ''' |
| 74 def Replace(match): |
| 75 if match.group() == '&': return '&' |
| 76 elif match.group() == '<': return '<' |
| 77 elif match.group() == '>': return '>' |
| 78 elif match.group() == '"': |
| 79 if escape_quotes: return '"' |
| 80 else: return match.group() |
| 81 else: assert False |
| 82 out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text) |
| 83 return out |
| 84 |
| 85 |
| 86 def UnescapeHtml(text, replace_nbsp=True): |
| 87 '''Returns 'text' with all HTML character entities (both named character |
| 88 entities and those specified by decimal or hexadecimal Unicode ordinal) |
| 89 replaced by their Unicode characters (or latin1 characters if possible). |
| 90 |
| 91 The only exception is that will not be escaped if 'replace_nbsp' is |
| 92 False. |
| 93 ''' |
| 94 def Replace(match): |
| 95 groups = match.groupdict() |
| 96 if groups['hex']: |
| 97 return unichr(int(groups['hex'], 16)) |
| 98 elif groups['decimal']: |
| 99 return unichr(int(groups['decimal'], 10)) |
| 100 else: |
| 101 name = groups['named'] |
| 102 if name == 'nbsp' and not replace_nbsp: |
| 103 return match.group() # Don't replace |
| 104 assert name != None |
| 105 if name in htmlentitydefs.name2codepoint.keys(): |
| 106 return unichr(htmlentitydefs.name2codepoint[name]) |
| 107 else: |
| 108 return match.group() # Unknown HTML character entity - don't replace |
| 109 |
| 110 out = _HTML_ENTITY.sub(Replace, text) |
| 111 return out |
| 112 |
| 113 |
| 114 def EncodeCdata(cdata): |
| 115 '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]> |
| 116 format, depending on which is more appropriate for easy editing. The data |
| 117 is escaped for inclusion in an XML element's body. |
| 118 |
| 119 Args: |
| 120 cdata: 'If x < y and y < z then x < z' |
| 121 |
| 122 Return: |
| 123 '<![CDATA[If x < y and y < z then x < z]]>' |
| 124 ''' |
| 125 if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0: |
| 126 return '<![CDATA[%s]]>' % cdata |
| 127 else: |
| 128 return saxutils.escape(cdata) |
| 129 |
| 130 |
| 131 def FixupNamedParam(function, param_name, param_value): |
| 132 '''Returns a closure that is identical to 'function' but ensures that the |
| 133 named parameter 'param_name' is always set to 'param_value' unless explicitly |
| 134 set by the caller. |
| 135 |
| 136 Args: |
| 137 function: callable |
| 138 param_name: 'bingo' |
| 139 param_value: 'bongo' (any type) |
| 140 |
| 141 Return: |
| 142 callable |
| 143 ''' |
| 144 def FixupClosure(*args, **kw): |
| 145 if not param_name in kw: |
| 146 kw[param_name] = param_value |
| 147 return function(*args, **kw) |
| 148 return FixupClosure |
| 149 |
| 150 |
| 151 def PathFromRoot(path): |
| 152 '''Takes a path relative to the root directory for GRIT (the one that grit.py |
| 153 resides in) and returns a path that is either absolute or relative to the |
| 154 current working directory (i.e .a path you can use to open the file). |
| 155 |
| 156 Args: |
| 157 path: 'rel_dir\file.ext' |
| 158 |
| 159 Return: |
| 160 'c:\src\tools\rel_dir\file.ext |
| 161 ''' |
| 162 return os.path.normpath(os.path.join(_root_dir, path)) |
| 163 |
| 164 |
| 165 def FixRootForUnittest(root_node, dir=PathFromRoot('.')): |
| 166 '''Adds a GetBaseDir() method to 'root_node', making unittesting easier.''' |
| 167 def GetBaseDir(): |
| 168 '''Returns a fake base directory.''' |
| 169 return dir |
| 170 def GetSourceLanguage(): |
| 171 return 'en' |
| 172 if not hasattr(root_node, 'GetBaseDir'): |
| 173 setattr(root_node, 'GetBaseDir', GetBaseDir) |
| 174 setattr(root_node, 'GetSourceLanguage', GetSourceLanguage) |
| 175 |
| 176 |
| 177 def dirname(filename): |
| 178 '''Version of os.path.dirname() that never returns empty paths (returns |
| 179 '.' if the result of os.path.dirname() is empty). |
| 180 ''' |
| 181 ret = os.path.dirname(filename) |
| 182 if ret == '': |
| 183 ret = '.' |
| 184 return ret |
| 185 |
| 186 |
| 187 def normpath(path): |
| 188 '''Version of os.path.normpath that also changes backward slashes to |
| 189 forward slashes when not running on Windows. |
| 190 ''' |
| 191 # This is safe to always do because the Windows version of os.path.normpath |
| 192 # will replace forward slashes with backward slashes. |
| 193 path = path.replace('\\', '/') |
| 194 return os.path.normpath(path) |
| 195 |
| 196 |
| 197 _LANGUAGE_SPLIT_RE = re.compile('-|_|/') |
| 198 |
| 199 |
| 200 def CanonicalLanguage(code): |
| 201 '''Canonicalizes two-part language codes by using a dash and making the |
| 202 second part upper case. Returns one-part language codes unchanged. |
| 203 |
| 204 Args: |
| 205 code: 'zh_cn' |
| 206 |
| 207 Return: |
| 208 code: 'zh-CN' |
| 209 ''' |
| 210 parts = _LANGUAGE_SPLIT_RE.split(code) |
| 211 code = [ parts[0] ] |
| 212 for part in parts[1:]: |
| 213 code.append(part.upper()) |
| 214 return '-'.join(code) |
| 215 |
| 216 |
| 217 _LANG_TO_CODEPAGE = { |
| 218 'en' : 1252, |
| 219 'fr' : 1252, |
| 220 'it' : 1252, |
| 221 'de' : 1252, |
| 222 'es' : 1252, |
| 223 'nl' : 1252, |
| 224 'sv' : 1252, |
| 225 'no' : 1252, |
| 226 'da' : 1252, |
| 227 'fi' : 1252, |
| 228 'pt-BR' : 1252, |
| 229 'ru' : 1251, |
| 230 'ja' : 932, |
| 231 'zh-TW' : 950, |
| 232 'zh-CN' : 936, |
| 233 'ko' : 949, |
| 234 } |
| 235 |
| 236 |
| 237 def LanguageToCodepage(lang): |
| 238 '''Returns the codepage _number_ that can be used to represent 'lang', which |
| 239 may be either in formats such as 'en', 'pt_br', 'pt-BR', etc. |
| 240 |
| 241 The codepage returned will be one of the 'cpXXXX' codepage numbers. |
| 242 |
| 243 Args: |
| 244 lang: 'de' |
| 245 |
| 246 Return: |
| 247 1252 |
| 248 ''' |
| 249 lang = CanonicalLanguage(lang) |
| 250 if lang in _LANG_TO_CODEPAGE: |
| 251 return _LANG_TO_CODEPAGE[lang] |
| 252 else: |
| 253 print "Not sure which codepage to use for %s, assuming cp1252" % lang |
| 254 return 1252 |
| 255 |
| 256 def NewClassInstance(class_name, class_type): |
| 257 '''Returns an instance of the class specified in classname |
| 258 |
| 259 Args: |
| 260 class_name: the fully qualified, dot separated package + classname, |
| 261 i.e. "my.package.name.MyClass". Short class names are not supported. |
| 262 class_type: the class or superclass this object must implement |
| 263 |
| 264 Return: |
| 265 An instance of the class, or None if none was found |
| 266 ''' |
| 267 lastdot = class_name.rfind('.') |
| 268 module_name = '' |
| 269 if lastdot >= 0: |
| 270 module_name = class_name[0:lastdot] |
| 271 if module_name: |
| 272 class_name = class_name[lastdot+1:] |
| 273 module = __import__(module_name, globals(), locals(), ['']) |
| 274 if hasattr(module, class_name): |
| 275 class_ = getattr(module, class_name) |
| 276 class_instance = class_() |
| 277 if isinstance(class_instance, class_type): |
| 278 return class_instance |
| 279 return None |
| 280 |
| 281 |
| 282 def FixLineEnd(text, line_end): |
| 283 # First normalize |
| 284 text = text.replace('\r\n', '\n') |
| 285 text = text.replace('\r', '\n') |
| 286 # Then fix |
| 287 text = text.replace('\n', line_end) |
| 288 return text |
| 289 |
| 290 |
| 291 def BoolToString(bool): |
| 292 if bool: |
| 293 return 'true' |
| 294 else: |
| 295 return 'false' |
| 296 |
| 297 |
| 298 verbose = False |
| 299 extra_verbose = False |
| 300 |
| 301 def IsVerbose(): |
| 302 return verbose |
| 303 |
| 304 def IsExtraVerbose(): |
| 305 return extra_verbose |
| 306 |
| 307 def GetCurrentYear(): |
| 308 '''Returns the current 4-digit year as an integer.''' |
| 309 return time.localtime()[0] |
| 310 |
OLD | NEW |