Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(266)

Side by Side Diff: fix_encoding.py

Issue 6721029: Fix locale.getlocale() exception. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « PRESUBMIT.py ('k') | gcl.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Collection of functions and classes to fix various encoding problems on
6 multiple platforms with python.
7 """
8
9 import codecs
10 import locale
11 import os
12 import sys
13
14
15 # Prevents initializing multiple times.
16 _SYS_ARGV_PROCESSED = False
17
18
19 def complain(message):
20 """If any exception occurs in this file, we'll probably try to print it
21 on stderr, which makes for frustrating debugging if stderr is directed
22 to our wrapper. So be paranoid about catching errors and reporting them
23 to sys.__stderr__, so that the user has a higher chance to see them.
24 """
25 print >> sys.__stderr__, (
26 isinstance(message, str) and message or repr(message))
27
28
29 def fix_default_encoding():
30 """Forces utf8 solidly on all platforms.
31
32 By default python execution environment is lazy and defaults to ascii
33 encoding.
34
35 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
36 """
37 if sys.getdefaultencoding() == 'utf-8':
38 return False
39
40 # Regenerate setdefaultencoding.
41 reload(sys)
42 # Module 'sys' has no 'setdefaultencoding' member
43 # pylint: disable=E1101
44 sys.setdefaultencoding('utf-8')
45 for attr in dir(locale):
46 if attr[0:3] != 'LC_':
47 continue
48 aref = getattr(locale, attr)
49 locale.setlocale(aref, '')
50 try:
51 lang = locale.getlocale(aref)[0]
52 except TypeError:
53 lang = None
54 if lang:
55 try:
56 locale.setlocale(aref, (lang, 'UTF-8'))
57 except locale.Error:
58 os.environ[attr] = lang + '.UTF-8'
59 locale.setlocale(locale.LC_ALL, '')
60 return True
61
62
63 ###############################
64 # Windows specific
65
66
67 def fix_win_sys_argv(encoding):
68 """Converts sys.argv to 'encoding' encoded string.
69
70 utf-8 is recommended.
71
72 Works around <http://bugs.python.org/issue2128>.
73 """
74 global _SYS_ARGV_PROCESSED
75 if _SYS_ARGV_PROCESSED:
76 return False
77
78 from ctypes import byref, c_int, POINTER, windll, WINFUNCTYPE
79 from ctypes.wintypes import LPCWSTR, LPWSTR
80
81 # <http://msdn.microsoft.com/en-us/library/ms683156.aspx>
82 GetCommandLineW = WINFUNCTYPE(LPWSTR)(('GetCommandLineW', windll.kernel32))
83 # <http://msdn.microsoft.com/en-us/library/bb776391.aspx>
84 CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(
85 ('CommandLineToArgvW', windll.shell32))
86
87 argc = c_int(0)
88 argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
89 argv = [
90 argv_unicode[i].encode(encoding, 'replace')
91 for i in xrange(0, argc.value)]
92
93 if not hasattr(sys, 'frozen'):
94 # If this is an executable produced by py2exe or bbfreeze, then it
95 # will have been invoked directly. Otherwise, unicode_argv[0] is the
96 # Python interpreter, so skip that.
97 argv = argv[1:]
98
99 # Also skip option arguments to the Python interpreter.
100 while len(argv) > 0:
101 arg = argv[0]
102 if not arg.startswith(u'-') or arg == u'-':
103 break
104 argv = argv[1:]
105 if arg == u'-m':
106 # sys.argv[0] should really be the absolute path of the
107 # module source, but never mind.
108 break
109 if arg == u'-c':
110 argv[0] = u'-c'
111 break
112 sys.argv = argv
113 _SYS_ARGV_PROCESSED = True
114 return True
115
116
117 def fix_win_codec():
118 """Works around <http://bugs.python.org/issue6058>."""
119 # <http://msdn.microsoft.com/en-us/library/dd317756.aspx>
120 try:
121 codecs.lookup('cp65001')
122 return False
123 except LookupError:
124 codecs.register(
125 lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
126 return True
127
128
129 class WinUnicodeOutputBase(object):
130 """Base class to adapt sys.stdout or sys.stderr to behave correctly on
131 Windows.
132
133 Setting encoding to utf-8 is recommended.
134 """
135 def __init__(self, fileno, name, encoding):
136 # Corresponding file handle.
137 self._fileno = fileno
138 self.encoding = encoding
139 self.name = name
140
141 self.closed = False
142 self.softspace = False
143 self.mode = 'w'
144
145 @staticmethod
146 def isatty():
147 return False
148
149 def close(self):
150 # Don't really close the handle, that would only cause problems.
151 self.closed = True
152
153 def fileno(self):
154 return self._fileno
155
156 def flush(self):
157 raise NotImplementedError()
158
159 def write(self, text):
160 raise NotImplementedError()
161
162 def writelines(self, lines):
163 try:
164 for line in lines:
165 self.write(line)
166 except Exception, e:
167 complain('%s.writelines: %r' % (self.name, e))
168 raise
169
170
171 class WinUnicodeConsoleOutput(WinUnicodeOutputBase):
172 """Output adapter to a Windows Console.
173
174 Understands how to use the win32 console API.
175 """
176 def __init__(self, console_handle, fileno, stream_name, encoding):
177 super(WinUnicodeConsoleOutput, self).__init__(
178 fileno, '<Unicode console %s>' % stream_name, encoding)
179 # Handle to use for WriteConsoleW
180 self._console_handle = console_handle
181
182 # Loads the necessary function.
183 from ctypes import byref, GetLastError, POINTER, windll, WINFUNCTYPE
184 from ctypes.wintypes import BOOL, DWORD, HANDLE, LPVOID, LPWSTR
185
186 self._DWORD = DWORD
187 self._byref = byref
188
189 # <http://msdn.microsoft.com/en-us/library/ms687401.aspx>
190 self._WriteConsoleW = WINFUNCTYPE(
191 BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(
192 ('WriteConsoleW', windll.kernel32))
193 self._GetLastError = GetLastError
194
195 def flush(self):
196 # No need to flush the console since it's immediate.
197 pass
198
199 def write(self, text):
200 try:
201 if not isinstance(text, unicode):
202 # Convert to unicode.
203 text = str(text).decode(self.encoding, 'replace')
204 remaining = len(text)
205 while remaining > 0:
206 n = self._DWORD(0)
207 # There is a shorter-than-documented limitation on the length of the
208 # string passed to WriteConsoleW. See
209 # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.
210 retval = self._WriteConsoleW(
211 self._console_handle, text,
212 min(remaining, 10000),
213 self._byref(n), None)
214 if retval == 0 or n.value == 0:
215 raise IOError(
216 'WriteConsoleW returned %r, n.value = %r, last error = %r' % (
217 retval, n.value, self._GetLastError()))
218 remaining -= n.value
219 if not remaining:
220 break
221 text = text[n.value:]
222 except Exception, e:
223 complain('%s.write: %r' % (self.name, e))
224 raise
225
226
227 class WinUnicodeOutput(WinUnicodeOutputBase):
228 """Output adaptor to a file output on Windows.
229
230 If the standard FileWrite function is used, it will be encoded in the current
231 code page. WriteConsoleW() permits writting any character.
232 """
233 def __init__(self, stream, fileno, encoding):
234 super(WinUnicodeOutput, self).__init__(
235 fileno, '<Unicode redirected %s>' % stream.name, encoding)
236 # Output stream
237 self._stream = stream
238
239 # Flush right now.
240 self.flush()
241
242 def flush(self):
243 try:
244 self._stream.flush()
245 except Exception, e:
246 complain('%s.flush: %r from %r' % (self.name, e, self._stream))
247 raise
248
249 def write(self, text):
250 try:
251 if isinstance(text, unicode):
252 # Replace characters that cannot be printed instead of failing.
253 text = text.encode(self.encoding, 'replace')
254 self._stream.write(text)
255 except Exception, e:
256 complain('%s.write: %r' % (self.name, e))
257 raise
258
259
260 def win_handle_is_a_console(handle):
261 """Returns True if a Windows file handle is a handle to a console."""
262 from ctypes import byref, POINTER, windll, WINFUNCTYPE
263 from ctypes.wintypes import BOOL, DWORD, HANDLE
264
265 FILE_TYPE_CHAR = 0x0002
266 FILE_TYPE_REMOTE = 0x8000
267 INVALID_HANDLE_VALUE = DWORD(-1).value
268
269 # <http://msdn.microsoft.com/en-us/library/ms683167.aspx>
270 GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(
271 ('GetConsoleMode', windll.kernel32))
272 # <http://msdn.microsoft.com/en-us/library/aa364960.aspx>
273 GetFileType = WINFUNCTYPE(DWORD, DWORD)(('GetFileType', windll.kernel32))
274
275 # GetStdHandle returns INVALID_HANDLE_VALUE, NULL, or a valid handle.
276 if handle == INVALID_HANDLE_VALUE or handle is None:
277 return False
278 return (
279 (GetFileType(handle) & ~FILE_TYPE_REMOTE) == FILE_TYPE_CHAR and
280 GetConsoleMode(handle, byref(DWORD())))
281
282
283 def win_get_unicode_stream(stream, excepted_fileno, output_handle, encoding):
284 """Returns a unicode-compatible stream.
285
286 This function will return a direct-Console writing object only if:
287 - the file number is the expected console file number
288 - the handle the expected file handle
289 - the 'real' handle is in fact a handle to a console.
290 """
291 old_fileno = getattr(stream, 'fileno', lambda: None)()
292 if old_fileno == excepted_fileno:
293 from ctypes import windll, WINFUNCTYPE
294 from ctypes.wintypes import DWORD, HANDLE
295
296 # <http://msdn.microsoft.com/en-us/library/ms683231.aspx>
297 GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(('GetStdHandle', windll.kernel32))
298
299 real_output_handle = GetStdHandle(DWORD(output_handle))
300 if win_handle_is_a_console(real_output_handle):
301 # It's a console.
302 return WinUnicodeConsoleOutput(
303 real_output_handle, old_fileno, stream.name, encoding)
304
305 # It's something else. Create an auto-encoding stream.
306 return WinUnicodeOutput(stream, old_fileno, encoding)
307
308
309 def fix_win_console(encoding):
310 """Makes Unicode console output work independently of the current code page.
311
312 This also fixes <http://bugs.python.org/issue1602>.
313 Credit to Michael Kaplan
314 <http://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx> and
315 TZOmegaTZIOY
316 <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes- python-crash/1432462#1432462>.
317 """
318 if (isinstance(sys.stdout, WinUnicodeOutputBase) or
319 isinstance(sys.stderr, WinUnicodeOutputBase)):
320 return False
321
322 try:
323 # SetConsoleCP and SetConsoleOutputCP could be used to change the code page
324 # but it's not really useful since the code here is using WriteConsoleW().
325 # Also, changing the code page is 'permanent' to the console and needs to be
326 # reverted manually.
327 # In practice one needs to set the console font to a TTF font to be able to
328 # see all the characters but it failed for me in practice. In any case, it
329 # won't throw any exception when printing, which is the important part.
330 # -11 and -12 are defined in stdio.h
331 sys.stdout = win_get_unicode_stream(sys.stdout, 1, -11, encoding)
332 sys.stderr = win_get_unicode_stream(sys.stderr, 2, -12, encoding)
333 # TODO(maruel): Do sys.stdin with ReadConsoleW(). Albeit the limitation is
334 # "It doesn't appear to be possible to read Unicode characters in UTF-8
335 # mode" and this appears to be a limitation of cmd.exe.
336 except Exception, e:
337 complain('exception %r while fixing up sys.stdout and sys.stderr' % e)
338 return True
339
340
341 def fix_encoding():
342 """Fixes various encoding problems on all platforms.
343
344 Should be called at the very begining of the process.
345 """
346 ret = True
347 if sys.platform == 'win32':
348 ret &= fix_win_codec()
349
350 ret &= fix_default_encoding()
351
352 if sys.platform == 'win32':
353 encoding = sys.getdefaultencoding()
354 ret &= fix_win_sys_argv(encoding)
355 ret &= fix_win_console(encoding)
356 return ret
OLDNEW
« no previous file with comments | « PRESUBMIT.py ('k') | gcl.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698