Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(988)

Side by Side Diff: grit/util.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « grit/tool/unit.py ('k') | grit/util_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 #!/usr/bin/python2.4
2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 '''Utilities used by GRIT.
7 '''
8
9 import sys
10 import os.path
11 import codecs
12 import htmlentitydefs
13 import re
14 import time
15 from xml.sax import saxutils
16
17 _root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
18
19
20 # Matches all of the resource IDs predefined by Windows.
21 # The '\b' before and after each word makes sure these match only whole words an d
22 # not the beginning of any word.. eg. ID_FILE_NEW will not match ID_FILE_NEW_PRO JECT
23 # see http://www.amk.ca/python/howto/regex/ (search for "\bclass\b" inside the h tml page)
24 SYSTEM_IDENTIFIERS = re.compile(
25 r'''\bIDOK\b | \bIDCANCEL\b | \bIDC_STATIC\b | \bIDYES\b | \bIDNO\b |
26 \bID_FILE_NEW\b | \bID_FILE_OPEN\b | \bID_FILE_CLOSE\b | \bID_FILE_SAVE\b |
27 \bID_FILE_SAVE_AS\b | \bID_FILE_PAGE_SETUP\b | \bID_FILE_PRINT_SETUP\b |
28 \bID_FILE_PRINT\b | \bID_FILE_PRINT_DIRECT\b | \bID_FILE_PRINT_PREVIEW\b |
29 \bID_FILE_UPDATE\b | \bID_FILE_SAVE_COPY_AS\b | \bID_FILE_SEND_MAIL\b |
30 \bID_FILE_MRU_FIRST\b | \bID_FILE_MRU_LAST\b |
31 \bID_EDIT_CLEAR\b | \bID_EDIT_CLEAR_ALL\b | \bID_EDIT_COPY\b |
32 \bID_EDIT_CUT\b | \bID_EDIT_FIND\b | \bID_EDIT_PASTE\b | \bID_EDIT_PASTE_L INK\b |
33 \bID_EDIT_PASTE_SPECIAL\b | \bID_EDIT_REPEAT\b | \bID_EDIT_REPLACE\b |
34 \bID_EDIT_SELECT_ALL\b | \bID_EDIT_UNDO\b | \bID_EDIT_REDO\b |
35 \bVS_VERSION_INFO\b | \bIDRETRY''', re.VERBOSE);
36
37
38 # Matches character entities, whether specified by name, decimal or hex.
39 _HTML_ENTITY = re.compile(
40 '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));',
41 re.IGNORECASE)
42
43 # Matches characters that should be HTML-escaped. This is <, > and &, but only
44 # if the & is not the start of an HTML character entity.
45 _HTML_CHARS_TO_ESCAPE = re.compile('"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)',
46 re.IGNORECASE | re.MULTILINE)
47
48
49 def WrapInputStream(stream, encoding = 'utf-8'):
50 '''Returns a stream that wraps the provided stream, making it read characters
51 using the specified encoding.'''
52 (e, d, sr, sw) = codecs.lookup(encoding)
53 return sr(stream)
54
55
56 def WrapOutputStream(stream, encoding = 'utf-8'):
57 '''Returns a stream that wraps the provided stream, making it write
58 characters using the specified encoding.'''
59 (e, d, sr, sw) = codecs.lookup(encoding)
60 return sw(stream)
61
62
63 def ChangeStdoutEncoding(encoding = 'utf-8'):
64 '''Changes STDOUT to print characters using the specified encoding.'''
65 sys.stdout = WrapOutputStream(sys.stdout, encoding)
66
67
68 def EscapeHtml(text, escape_quotes = False):
69 '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML
70 entities. Any existing named entity or HTML entity defined by decimal or
71 hex code will be left untouched. This is appropriate for escaping text for
72 inclusion in HTML, but not for XML.
73 '''
74 def Replace(match):
75 if match.group() == '&': return '&amp;'
76 elif match.group() == '<': return '&lt;'
77 elif match.group() == '>': return '&gt;'
78 elif match.group() == '"':
79 if escape_quotes: return '&quot;'
80 else: return match.group()
81 else: assert False
82 out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text)
83 return out
84
85
86 def UnescapeHtml(text, replace_nbsp=True):
87 '''Returns 'text' with all HTML character entities (both named character
88 entities and those specified by decimal or hexadecimal Unicode ordinal)
89 replaced by their Unicode characters (or latin1 characters if possible).
90
91 The only exception is that &nbsp; will not be escaped if 'replace_nbsp' is
92 False.
93 '''
94 def Replace(match):
95 groups = match.groupdict()
96 if groups['hex']:
97 return unichr(int(groups['hex'], 16))
98 elif groups['decimal']:
99 return unichr(int(groups['decimal'], 10))
100 else:
101 name = groups['named']
102 if name == 'nbsp' and not replace_nbsp:
103 return match.group() # Don't replace &nbsp;
104 assert name != None
105 if name in htmlentitydefs.name2codepoint.keys():
106 return unichr(htmlentitydefs.name2codepoint[name])
107 else:
108 return match.group() # Unknown HTML character entity - don't replace
109
110 out = _HTML_ENTITY.sub(Replace, text)
111 return out
112
113
114 def EncodeCdata(cdata):
115 '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]>
116 format, depending on which is more appropriate for easy editing. The data
117 is escaped for inclusion in an XML element's body.
118
119 Args:
120 cdata: 'If x < y and y < z then x < z'
121
122 Return:
123 '<![CDATA[If x < y and y < z then x < z]]>'
124 '''
125 if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0:
126 return '<![CDATA[%s]]>' % cdata
127 else:
128 return saxutils.escape(cdata)
129
130
131 def FixupNamedParam(function, param_name, param_value):
132 '''Returns a closure that is identical to 'function' but ensures that the
133 named parameter 'param_name' is always set to 'param_value' unless explicitly
134 set by the caller.
135
136 Args:
137 function: callable
138 param_name: 'bingo'
139 param_value: 'bongo' (any type)
140
141 Return:
142 callable
143 '''
144 def FixupClosure(*args, **kw):
145 if not param_name in kw:
146 kw[param_name] = param_value
147 return function(*args, **kw)
148 return FixupClosure
149
150
151 def PathFromRoot(path):
152 '''Takes a path relative to the root directory for GRIT (the one that grit.py
153 resides in) and returns a path that is either absolute or relative to the
154 current working directory (i.e .a path you can use to open the file).
155
156 Args:
157 path: 'rel_dir\file.ext'
158
159 Return:
160 'c:\src\tools\rel_dir\file.ext
161 '''
162 return os.path.normpath(os.path.join(_root_dir, path))
163
164
165 def FixRootForUnittest(root_node, dir=PathFromRoot('.')):
166 '''Adds a GetBaseDir() method to 'root_node', making unittesting easier.'''
167 def GetBaseDir():
168 '''Returns a fake base directory.'''
169 return dir
170 def GetSourceLanguage():
171 return 'en'
172 if not hasattr(root_node, 'GetBaseDir'):
173 setattr(root_node, 'GetBaseDir', GetBaseDir)
174 setattr(root_node, 'GetSourceLanguage', GetSourceLanguage)
175
176
177 def dirname(filename):
178 '''Version of os.path.dirname() that never returns empty paths (returns
179 '.' if the result of os.path.dirname() is empty).
180 '''
181 ret = os.path.dirname(filename)
182 if ret == '':
183 ret = '.'
184 return ret
185
186
187 def normpath(path):
188 '''Version of os.path.normpath that also changes backward slashes to
189 forward slashes when not running on Windows.
190 '''
191 # This is safe to always do because the Windows version of os.path.normpath
192 # will replace forward slashes with backward slashes.
193 path = path.replace('\\', '/')
194 return os.path.normpath(path)
195
196
197 _LANGUAGE_SPLIT_RE = re.compile('-|_|/')
198
199
200 def CanonicalLanguage(code):
201 '''Canonicalizes two-part language codes by using a dash and making the
202 second part upper case. Returns one-part language codes unchanged.
203
204 Args:
205 code: 'zh_cn'
206
207 Return:
208 code: 'zh-CN'
209 '''
210 parts = _LANGUAGE_SPLIT_RE.split(code)
211 code = [ parts[0] ]
212 for part in parts[1:]:
213 code.append(part.upper())
214 return '-'.join(code)
215
216
217 _LANG_TO_CODEPAGE = {
218 'en' : 1252,
219 'fr' : 1252,
220 'it' : 1252,
221 'de' : 1252,
222 'es' : 1252,
223 'nl' : 1252,
224 'sv' : 1252,
225 'no' : 1252,
226 'da' : 1252,
227 'fi' : 1252,
228 'pt-BR' : 1252,
229 'ru' : 1251,
230 'ja' : 932,
231 'zh-TW' : 950,
232 'zh-CN' : 936,
233 'ko' : 949,
234 }
235
236
237 def LanguageToCodepage(lang):
238 '''Returns the codepage _number_ that can be used to represent 'lang', which
239 may be either in formats such as 'en', 'pt_br', 'pt-BR', etc.
240
241 The codepage returned will be one of the 'cpXXXX' codepage numbers.
242
243 Args:
244 lang: 'de'
245
246 Return:
247 1252
248 '''
249 lang = CanonicalLanguage(lang)
250 if lang in _LANG_TO_CODEPAGE:
251 return _LANG_TO_CODEPAGE[lang]
252 else:
253 print "Not sure which codepage to use for %s, assuming cp1252" % lang
254 return 1252
255
256 def NewClassInstance(class_name, class_type):
257 '''Returns an instance of the class specified in classname
258
259 Args:
260 class_name: the fully qualified, dot separated package + classname,
261 i.e. "my.package.name.MyClass". Short class names are not supported.
262 class_type: the class or superclass this object must implement
263
264 Return:
265 An instance of the class, or None if none was found
266 '''
267 lastdot = class_name.rfind('.')
268 module_name = ''
269 if lastdot >= 0:
270 module_name = class_name[0:lastdot]
271 if module_name:
272 class_name = class_name[lastdot+1:]
273 module = __import__(module_name, globals(), locals(), [''])
274 if hasattr(module, class_name):
275 class_ = getattr(module, class_name)
276 class_instance = class_()
277 if isinstance(class_instance, class_type):
278 return class_instance
279 return None
280
281
282 def FixLineEnd(text, line_end):
283 # First normalize
284 text = text.replace('\r\n', '\n')
285 text = text.replace('\r', '\n')
286 # Then fix
287 text = text.replace('\n', line_end)
288 return text
289
290
291 def BoolToString(bool):
292 if bool:
293 return 'true'
294 else:
295 return 'false'
296
297
298 verbose = False
299 extra_verbose = False
300
301 def IsVerbose():
302 return verbose
303
304 def IsExtraVerbose():
305 return extra_verbose
306
307 def GetCurrentYear():
308 '''Returns the current 4-digit year as an integer.'''
309 return time.localtime()[0]
310
OLDNEW
« no previous file with comments | « grit/tool/unit.py ('k') | grit/util_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698