Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: fix_encoding.py

Issue 6676090: Add code to 'fix' python encoding and it's unit test. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Fix a comment Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « PRESUBMIT.py ('k') | gcl.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Collection of functions and classes to fix various encoding problems on
6 multiple platforms with python.
7 """
8
9 import codecs
10 import locale
11 import os
12 import sys
13
14
15 # Prevents initializing multiple times.
16 _SYS_ARGV_PROCESSED = False
17
18
19 def complain(message):
20 """If any exception occurs in this file, we'll probably try to print it
21 on stderr, which makes for frustrating debugging if stderr is directed
22 to our wrapper. So be paranoid about catching errors and reporting them
23 to sys.__stderr__, so that the user has a higher chance to see them.
24 """
25 print >> sys.__stderr__, (
26 isinstance(message, str) and message or repr(message))
27
28
29 def fix_default_encoding():
30 """Forces utf8 solidly on all platforms.
31
32 By default python execution environment is lazy and defaults to ascii
33 encoding.
34
35 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
36 """
37 if sys.getdefaultencoding() == 'utf-8':
38 return False
39
40 # Regenerate setdefaultencoding.
41 reload(sys)
42 # Module 'sys' has no 'setdefaultencoding' member
43 # pylint: disable=E1101
44 sys.setdefaultencoding('utf-8')
45 for attr in dir(locale):
46 if attr[0:3] != 'LC_':
47 continue
48 aref = getattr(locale, attr)
49 locale.setlocale(aref, '')
50 lang, _ = locale.getlocale(aref)
51 if lang != None:
52 try:
53 locale.setlocale(aref, (lang, 'UTF-8'))
54 except locale.Error:
55 os.environ[attr] = lang + '.UTF-8'
56 locale.setlocale(locale.LC_ALL, '')
57 return True
58
59
60 ###############################
61 # Windows specific
62
63
64 def fix_win_sys_argv(encoding):
65 """Converts sys.argv to 'encoding' encoded string.
66
67 utf-8 is recommended.
68
69 Works around <http://bugs.python.org/issue2128>.
70 """
71 global _SYS_ARGV_PROCESSED
72 if _SYS_ARGV_PROCESSED:
73 return False
74
75 from ctypes import byref, c_int, POINTER, windll, WINFUNCTYPE
76 from ctypes.wintypes import LPCWSTR, LPWSTR
77
78 # <http://msdn.microsoft.com/en-us/library/ms683156.aspx>
79 GetCommandLineW = WINFUNCTYPE(LPWSTR)(('GetCommandLineW', windll.kernel32))
80 # <http://msdn.microsoft.com/en-us/library/bb776391.aspx>
81 CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(
82 ('CommandLineToArgvW', windll.shell32))
83
84 argc = c_int(0)
85 argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
86 argv = [
87 argv_unicode[i].encode(encoding, 'replace')
88 for i in xrange(0, argc.value)]
89
90 if not hasattr(sys, 'frozen'):
91 # If this is an executable produced by py2exe or bbfreeze, then it
92 # will have been invoked directly. Otherwise, unicode_argv[0] is the
93 # Python interpreter, so skip that.
94 argv = argv[1:]
95
96 # Also skip option arguments to the Python interpreter.
97 while len(argv) > 0:
98 arg = argv[0]
99 if not arg.startswith(u'-') or arg == u'-':
100 break
101 argv = argv[1:]
102 if arg == u'-m':
103 # sys.argv[0] should really be the absolute path of the
104 # module source, but never mind.
105 break
106 if arg == u'-c':
107 argv[0] = u'-c'
108 break
109 sys.argv = argv
110 _SYS_ARGV_PROCESSED = True
111 return True
112
113
114 def fix_win_codec():
115 """Works around <http://bugs.python.org/issue6058>."""
116 # <http://msdn.microsoft.com/en-us/library/dd317756.aspx>
117 try:
118 codecs.lookup('cp65001')
119 return False
120 except LookupError:
121 codecs.register(
122 lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
123 return True
124
125
126 class WinUnicodeOutputBase(object):
127 """Base class to adapt sys.stdout or sys.stderr to behave correctly on
128 Windows.
129
130 Setting encoding to utf-8 is recommended.
131 """
132 def __init__(self, fileno, name, encoding):
133 # Corresponding file handle.
134 self._fileno = fileno
135 self.encoding = encoding
136 self.name = name
137
138 self.closed = False
139 self.softspace = False
140 self.mode = 'w'
141
142 @staticmethod
143 def isatty():
144 return False
145
146 def close(self):
147 # Don't really close the handle, that would only cause problems.
148 self.closed = True
149
150 def fileno(self):
151 return self._fileno
152
153 def flush(self):
154 raise NotImplementedError()
155
156 def write(self, text):
157 raise NotImplementedError()
158
159 def writelines(self, lines):
160 try:
161 for line in lines:
162 self.write(line)
163 except Exception, e:
164 complain('%s.writelines: %r' % (self.name, e))
165 raise
166
167
168 class WinUnicodeConsoleOutput(WinUnicodeOutputBase):
169 """Output adapter to a Windows Console.
170
171 Understands how to use the win32 console API.
172 """
173 def __init__(self, console_handle, fileno, stream_name, encoding):
174 super(WinUnicodeConsoleOutput, self).__init__(
175 fileno, '<Unicode console %s>' % stream_name, encoding)
176 # Handle to use for WriteConsoleW
177 self._console_handle = console_handle
178
179 # Loads the necessary function.
180 from ctypes import byref, GetLastError, POINTER, windll, WINFUNCTYPE
181 from ctypes.wintypes import BOOL, DWORD, HANDLE, LPVOID, LPWSTR
182
183 self._DWORD = DWORD
184 self._byref = byref
185
186 # <http://msdn.microsoft.com/en-us/library/ms687401.aspx>
187 self._WriteConsoleW = WINFUNCTYPE(
188 BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(
189 ('WriteConsoleW', windll.kernel32))
190 self._GetLastError = GetLastError
191
192 def flush(self):
193 # No need to flush the console since it's immediate.
194 pass
195
196 def write(self, text):
197 try:
198 if not isinstance(text, unicode):
199 # Convert to unicode.
200 text = str(text).decode(self.encoding, 'replace')
201 remaining = len(text)
202 while remaining > 0:
203 n = self._DWORD(0)
204 # There is a shorter-than-documented limitation on the length of the
205 # string passed to WriteConsoleW. See
206 # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.
207 retval = self._WriteConsoleW(
208 self._console_handle, text,
209 min(remaining, 10000),
210 self._byref(n), None)
211 if retval == 0 or n.value == 0:
212 raise IOError(
213 'WriteConsoleW returned %r, n.value = %r, last error = %r' % (
214 retval, n.value, self._GetLastError()))
215 remaining -= n.value
216 if not remaining:
217 break
218 text = text[n.value:]
219 except Exception, e:
220 complain('%s.write: %r' % (self.name, e))
221 raise
222
223
224 class WinUnicodeOutput(WinUnicodeOutputBase):
225 """Output adaptor to a file output on Windows.
226
227 If the standard FileWrite function is used, it will be encoded in the current
228 code page. WriteConsoleW() permits writting any character.
229 """
230 def __init__(self, stream, fileno, encoding):
231 super(WinUnicodeOutput, self).__init__(
232 fileno, '<Unicode redirected %s>' % stream.name, encoding)
233 # Output stream
234 self._stream = stream
235
236 # Flush right now.
237 self.flush()
238
239 def flush(self):
240 try:
241 self._stream.flush()
242 except Exception, e:
243 complain('%s.flush: %r from %r' % (self.name, e, self._stream))
244 raise
245
246 def write(self, text):
247 try:
248 if isinstance(text, unicode):
249 # Replace characters that cannot be printed instead of failing.
250 text = text.encode(self.encoding, 'replace')
251 self._stream.write(text)
252 except Exception, e:
253 complain('%s.write: %r' % (self.name, e))
254 raise
255
256
257 def win_handle_is_a_console(handle):
258 """Returns True if a Windows file handle is a handle to a console."""
259 from ctypes import byref, POINTER, windll, WINFUNCTYPE
260 from ctypes.wintypes import BOOL, DWORD, HANDLE
261
262 FILE_TYPE_CHAR = 0x0002
263 FILE_TYPE_REMOTE = 0x8000
264 INVALID_HANDLE_VALUE = DWORD(-1).value
265
266 # <http://msdn.microsoft.com/en-us/library/ms683167.aspx>
267 GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(
268 ('GetConsoleMode', windll.kernel32))
269 # <http://msdn.microsoft.com/en-us/library/aa364960.aspx>
270 GetFileType = WINFUNCTYPE(DWORD, DWORD)(('GetFileType', windll.kernel32))
271
272 # GetStdHandle returns INVALID_HANDLE_VALUE, NULL, or a valid handle.
273 if handle == INVALID_HANDLE_VALUE or handle is None:
274 return False
275 return (
276 (GetFileType(handle) & ~FILE_TYPE_REMOTE) == FILE_TYPE_CHAR and
277 GetConsoleMode(handle, byref(DWORD())))
278
279
280 def win_get_unicode_stream(stream, excepted_fileno, output_handle, encoding):
281 """Returns a unicode-compatible stream.
282
283 This function will return a direct-Console writing object only if:
284 - the file number is the expected console file number
285 - the handle the expected file handle
286 - the 'real' handle is in fact a handle to a console.
287 """
288 old_fileno = getattr(stream, 'fileno', lambda: None)()
289 if old_fileno == excepted_fileno:
290 from ctypes import windll, WINFUNCTYPE
291 from ctypes.wintypes import DWORD, HANDLE
292
293 # <http://msdn.microsoft.com/en-us/library/ms683231.aspx>
294 GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(('GetStdHandle', windll.kernel32))
295
296 real_output_handle = GetStdHandle(DWORD(output_handle))
297 if win_handle_is_a_console(real_output_handle):
298 # It's a console.
299 return WinUnicodeConsoleOutput(
300 real_output_handle, old_fileno, stream.name, encoding)
301
302 # It's something else. Create an auto-encoding stream.
303 return WinUnicodeOutput(stream, old_fileno, encoding)
304
305
306 def fix_win_console(encoding):
307 """Makes Unicode console output work independently of the current code page.
308
309 This also fixes <http://bugs.python.org/issue1602>.
310 Credit to Michael Kaplan
311 <http://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx> and
312 TZOmegaTZIOY
313 <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes- python-crash/1432462#1432462>.
314 """
315 if (isinstance(sys.stdout, WinUnicodeOutputBase) or
316 isinstance(sys.stderr, WinUnicodeOutputBase)):
317 return False
318
319 try:
320 # SetConsoleCP and SetConsoleOutputCP could be used to change the code page
321 # but it's not really useful since the code here is using WriteConsoleW().
322 # Also, changing the code page is 'permanent' to the console and needs to be
323 # reverted manually.
324 # In practice one needs to set the console font to a TTF font to be able to
325 # see all the characters but it failed for me in practice. In any case, it
326 # won't throw any exception when printing, which is the important part.
327 # -11 and -12 are defined in stdio.h
328 sys.stdout = win_get_unicode_stream(sys.stdout, 1, -11, encoding)
329 sys.stderr = win_get_unicode_stream(sys.stderr, 2, -12, encoding)
330 # TODO(maruel): Do sys.stdin with ReadConsoleW(). Albeit the limitation is
331 # "It doesn't appear to be possible to read Unicode characters in UTF-8
332 # mode" and this appears to be a limitation of cmd.exe.
333 except Exception, e:
334 complain('exception %r while fixing up sys.stdout and sys.stderr' % e)
335 return True
336
337
338 def fix_encoding():
339 """Fixes various encoding problems on all platforms.
340
341 Should be called at the very begining of the process.
342 """
343 ret = True
344 if sys.platform == 'win32':
345 ret &= fix_win_codec()
346
347 ret &= fix_default_encoding()
348
349 if sys.platform == 'win32':
350 encoding = sys.getdefaultencoding()
351 ret &= fix_win_sys_argv(encoding)
352 ret &= fix_win_console(encoding)
353 return ret
OLDNEW
« no previous file with comments | « PRESUBMIT.py ('k') | gcl.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698