| OLD | NEW |
| (Empty) |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """Collection of functions and classes to fix various encoding problems on | |
| 6 multiple platforms with python. | |
| 7 """ | |
| 8 | |
| 9 import codecs | |
| 10 import locale | |
| 11 import os | |
| 12 import sys | |
| 13 | |
| 14 | |
| 15 # Prevents initializing multiple times. | |
| 16 _SYS_ARGV_PROCESSED = False | |
| 17 | |
| 18 | |
| 19 def complain(message): | |
| 20 """If any exception occurs in this file, we'll probably try to print it | |
| 21 on stderr, which makes for frustrating debugging if stderr is directed | |
| 22 to our wrapper. So be paranoid about catching errors and reporting them | |
| 23 to sys.__stderr__, so that the user has a higher chance to see them. | |
| 24 """ | |
| 25 print >> sys.__stderr__, ( | |
| 26 isinstance(message, str) and message or repr(message)) | |
| 27 | |
| 28 | |
| 29 def fix_default_encoding(): | |
| 30 """Forces utf8 solidly on all platforms. | |
| 31 | |
| 32 By default python execution environment is lazy and defaults to ascii | |
| 33 encoding. | |
| 34 | |
| 35 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/ | |
| 36 """ | |
| 37 if sys.getdefaultencoding() == 'utf-8': | |
| 38 return False | |
| 39 | |
| 40 # Regenerate setdefaultencoding. | |
| 41 reload(sys) | |
| 42 # Module 'sys' has no 'setdefaultencoding' member | |
| 43 # pylint: disable=E1101 | |
| 44 sys.setdefaultencoding('utf-8') | |
| 45 for attr in dir(locale): | |
| 46 if attr[0:3] != 'LC_': | |
| 47 continue | |
| 48 aref = getattr(locale, attr) | |
| 49 try: | |
| 50 locale.setlocale(aref, '') | |
| 51 except locale.Error: | |
| 52 continue | |
| 53 try: | |
| 54 lang = locale.getlocale(aref)[0] | |
| 55 except (TypeError, ValueError): | |
| 56 continue | |
| 57 if lang: | |
| 58 try: | |
| 59 locale.setlocale(aref, (lang, 'UTF-8')) | |
| 60 except locale.Error: | |
| 61 os.environ[attr] = lang + '.UTF-8' | |
| 62 try: | |
| 63 locale.setlocale(locale.LC_ALL, '') | |
| 64 except locale.Error: | |
| 65 pass | |
| 66 return True | |
| 67 | |
| 68 | |
| 69 ############################### | |
| 70 # Windows specific | |
| 71 | |
| 72 | |
| 73 def fix_win_sys_argv(encoding): | |
| 74 """Converts sys.argv to 'encoding' encoded string. | |
| 75 | |
| 76 utf-8 is recommended. | |
| 77 | |
| 78 Works around <http://bugs.python.org/issue2128>. | |
| 79 """ | |
| 80 global _SYS_ARGV_PROCESSED | |
| 81 if _SYS_ARGV_PROCESSED: | |
| 82 return False | |
| 83 | |
| 84 # These types are available on linux but not Mac. | |
| 85 # pylint: disable=E0611,F0401 | |
| 86 from ctypes import byref, c_int, POINTER, windll, WINFUNCTYPE | |
| 87 from ctypes.wintypes import LPCWSTR, LPWSTR | |
| 88 | |
| 89 # <http://msdn.microsoft.com/en-us/library/ms683156.aspx> | |
| 90 GetCommandLineW = WINFUNCTYPE(LPWSTR)(('GetCommandLineW', windll.kernel32)) | |
| 91 # <http://msdn.microsoft.com/en-us/library/bb776391.aspx> | |
| 92 CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))( | |
| 93 ('CommandLineToArgvW', windll.shell32)) | |
| 94 | |
| 95 argc = c_int(0) | |
| 96 argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) | |
| 97 argv = [ | |
| 98 argv_unicode[i].encode(encoding, 'replace') | |
| 99 for i in xrange(0, argc.value)] | |
| 100 | |
| 101 if not hasattr(sys, 'frozen'): | |
| 102 # If this is an executable produced by py2exe or bbfreeze, then it | |
| 103 # will have been invoked directly. Otherwise, unicode_argv[0] is the | |
| 104 # Python interpreter, so skip that. | |
| 105 argv = argv[1:] | |
| 106 | |
| 107 # Also skip option arguments to the Python interpreter. | |
| 108 while len(argv) > 0: | |
| 109 arg = argv[0] | |
| 110 if not arg.startswith(u'-') or arg == u'-': | |
| 111 break | |
| 112 argv = argv[1:] | |
| 113 if arg == u'-m': | |
| 114 # sys.argv[0] should really be the absolute path of the | |
| 115 # module source, but never mind. | |
| 116 break | |
| 117 if arg == u'-c': | |
| 118 argv[0] = u'-c' | |
| 119 break | |
| 120 sys.argv = argv | |
| 121 _SYS_ARGV_PROCESSED = True | |
| 122 return True | |
| 123 | |
| 124 | |
| 125 def fix_win_codec(): | |
| 126 """Works around <http://bugs.python.org/issue6058>.""" | |
| 127 # <http://msdn.microsoft.com/en-us/library/dd317756.aspx> | |
| 128 try: | |
| 129 codecs.lookup('cp65001') | |
| 130 return False | |
| 131 except LookupError: | |
| 132 codecs.register( | |
| 133 lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None) | |
| 134 return True | |
| 135 | |
| 136 | |
| 137 class WinUnicodeOutputBase(object): | |
| 138 """Base class to adapt sys.stdout or sys.stderr to behave correctly on | |
| 139 Windows. | |
| 140 | |
| 141 Setting encoding to utf-8 is recommended. | |
| 142 """ | |
| 143 def __init__(self, fileno, name, encoding): | |
| 144 # Corresponding file handle. | |
| 145 self._fileno = fileno | |
| 146 self.encoding = encoding | |
| 147 self.name = name | |
| 148 | |
| 149 self.closed = False | |
| 150 self.softspace = False | |
| 151 self.mode = 'w' | |
| 152 | |
| 153 @staticmethod | |
| 154 def isatty(): | |
| 155 return False | |
| 156 | |
| 157 def close(self): | |
| 158 # Don't really close the handle, that would only cause problems. | |
| 159 self.closed = True | |
| 160 | |
| 161 def fileno(self): | |
| 162 return self._fileno | |
| 163 | |
| 164 def flush(self): | |
| 165 raise NotImplementedError() | |
| 166 | |
| 167 def write(self, text): | |
| 168 raise NotImplementedError() | |
| 169 | |
| 170 def writelines(self, lines): | |
| 171 try: | |
| 172 for line in lines: | |
| 173 self.write(line) | |
| 174 except Exception, e: | |
| 175 complain('%s.writelines: %r' % (self.name, e)) | |
| 176 raise | |
| 177 | |
| 178 | |
| 179 class WinUnicodeConsoleOutput(WinUnicodeOutputBase): | |
| 180 """Output adapter to a Windows Console. | |
| 181 | |
| 182 Understands how to use the win32 console API. | |
| 183 """ | |
| 184 def __init__(self, console_handle, fileno, stream_name, encoding): | |
| 185 super(WinUnicodeConsoleOutput, self).__init__( | |
| 186 fileno, '<Unicode console %s>' % stream_name, encoding) | |
| 187 # Handle to use for WriteConsoleW | |
| 188 self._console_handle = console_handle | |
| 189 | |
| 190 # Loads the necessary function. | |
| 191 # These types are available on linux but not Mac. | |
| 192 # pylint: disable=E0611,F0401 | |
| 193 from ctypes import byref, GetLastError, POINTER, windll, WINFUNCTYPE | |
| 194 from ctypes.wintypes import BOOL, DWORD, HANDLE, LPWSTR | |
| 195 from ctypes.wintypes import LPVOID # pylint: disable=E0611 | |
| 196 | |
| 197 self._DWORD = DWORD | |
| 198 self._byref = byref | |
| 199 | |
| 200 # <http://msdn.microsoft.com/en-us/library/ms687401.aspx> | |
| 201 self._WriteConsoleW = WINFUNCTYPE( | |
| 202 BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)( | |
| 203 ('WriteConsoleW', windll.kernel32)) | |
| 204 self._GetLastError = GetLastError | |
| 205 | |
| 206 def flush(self): | |
| 207 # No need to flush the console since it's immediate. | |
| 208 pass | |
| 209 | |
| 210 def write(self, text): | |
| 211 try: | |
| 212 if not isinstance(text, unicode): | |
| 213 # Convert to unicode. | |
| 214 text = str(text).decode(self.encoding, 'replace') | |
| 215 remaining = len(text) | |
| 216 while remaining > 0: | |
| 217 n = self._DWORD(0) | |
| 218 # There is a shorter-than-documented limitation on the length of the | |
| 219 # string passed to WriteConsoleW. See | |
| 220 # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>. | |
| 221 retval = self._WriteConsoleW( | |
| 222 self._console_handle, text, | |
| 223 min(remaining, 10000), | |
| 224 self._byref(n), None) | |
| 225 if retval == 0 or n.value == 0: | |
| 226 raise IOError( | |
| 227 'WriteConsoleW returned %r, n.value = %r, last error = %r' % ( | |
| 228 retval, n.value, self._GetLastError())) | |
| 229 remaining -= n.value | |
| 230 if not remaining: | |
| 231 break | |
| 232 text = text[n.value:] | |
| 233 except Exception, e: | |
| 234 complain('%s.write: %r' % (self.name, e)) | |
| 235 raise | |
| 236 | |
| 237 | |
| 238 class WinUnicodeOutput(WinUnicodeOutputBase): | |
| 239 """Output adaptor to a file output on Windows. | |
| 240 | |
| 241 If the standard FileWrite function is used, it will be encoded in the current | |
| 242 code page. WriteConsoleW() permits writting any character. | |
| 243 """ | |
| 244 def __init__(self, stream, fileno, encoding): | |
| 245 super(WinUnicodeOutput, self).__init__( | |
| 246 fileno, '<Unicode redirected %s>' % stream.name, encoding) | |
| 247 # Output stream | |
| 248 self._stream = stream | |
| 249 | |
| 250 # Flush right now. | |
| 251 self.flush() | |
| 252 | |
| 253 def flush(self): | |
| 254 try: | |
| 255 self._stream.flush() | |
| 256 except Exception, e: | |
| 257 complain('%s.flush: %r from %r' % (self.name, e, self._stream)) | |
| 258 raise | |
| 259 | |
| 260 def write(self, text): | |
| 261 try: | |
| 262 if isinstance(text, unicode): | |
| 263 # Replace characters that cannot be printed instead of failing. | |
| 264 text = text.encode(self.encoding, 'replace') | |
| 265 self._stream.write(text) | |
| 266 except Exception, e: | |
| 267 complain('%s.write: %r' % (self.name, e)) | |
| 268 raise | |
| 269 | |
| 270 | |
| 271 def win_handle_is_a_console(handle): | |
| 272 """Returns True if a Windows file handle is a handle to a console.""" | |
| 273 # These types are available on linux but not Mac. | |
| 274 # pylint: disable=E0611,F0401 | |
| 275 from ctypes import byref, POINTER, windll, WINFUNCTYPE | |
| 276 from ctypes.wintypes import BOOL, DWORD, HANDLE | |
| 277 | |
| 278 FILE_TYPE_CHAR = 0x0002 | |
| 279 FILE_TYPE_REMOTE = 0x8000 | |
| 280 INVALID_HANDLE_VALUE = DWORD(-1).value | |
| 281 | |
| 282 # <http://msdn.microsoft.com/en-us/library/ms683167.aspx> | |
| 283 GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))( | |
| 284 ('GetConsoleMode', windll.kernel32)) | |
| 285 # <http://msdn.microsoft.com/en-us/library/aa364960.aspx> | |
| 286 GetFileType = WINFUNCTYPE(DWORD, DWORD)(('GetFileType', windll.kernel32)) | |
| 287 | |
| 288 # GetStdHandle returns INVALID_HANDLE_VALUE, NULL, or a valid handle. | |
| 289 if handle == INVALID_HANDLE_VALUE or handle is None: | |
| 290 return False | |
| 291 return ( | |
| 292 (GetFileType(handle) & ~FILE_TYPE_REMOTE) == FILE_TYPE_CHAR and | |
| 293 GetConsoleMode(handle, byref(DWORD()))) | |
| 294 | |
| 295 | |
| 296 def win_get_unicode_stream(stream, excepted_fileno, output_handle, encoding): | |
| 297 """Returns a unicode-compatible stream. | |
| 298 | |
| 299 This function will return a direct-Console writing object only if: | |
| 300 - the file number is the expected console file number | |
| 301 - the handle the expected file handle | |
| 302 - the 'real' handle is in fact a handle to a console. | |
| 303 """ | |
| 304 old_fileno = getattr(stream, 'fileno', lambda: None)() | |
| 305 if old_fileno == excepted_fileno: | |
| 306 # These types are available on linux but not Mac. | |
| 307 # pylint: disable=E0611,F0401 | |
| 308 from ctypes import windll, WINFUNCTYPE | |
| 309 from ctypes.wintypes import DWORD, HANDLE | |
| 310 | |
| 311 # <http://msdn.microsoft.com/en-us/library/ms683231.aspx> | |
| 312 GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(('GetStdHandle', windll.kernel32)) | |
| 313 | |
| 314 real_output_handle = GetStdHandle(DWORD(output_handle)) | |
| 315 if win_handle_is_a_console(real_output_handle): | |
| 316 # It's a console. | |
| 317 return WinUnicodeConsoleOutput( | |
| 318 real_output_handle, old_fileno, stream.name, encoding) | |
| 319 | |
| 320 # It's something else. Create an auto-encoding stream. | |
| 321 return WinUnicodeOutput(stream, old_fileno, encoding) | |
| 322 | |
| 323 | |
| 324 def fix_win_console(encoding): | |
| 325 """Makes Unicode console output work independently of the current code page. | |
| 326 | |
| 327 This also fixes <http://bugs.python.org/issue1602>. | |
| 328 Credit to Michael Kaplan | |
| 329 <http://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx> and | |
| 330 TZOmegaTZIOY | |
| 331 <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-
python-crash/1432462#1432462>. | |
| 332 """ | |
| 333 if (isinstance(sys.stdout, WinUnicodeOutputBase) or | |
| 334 isinstance(sys.stderr, WinUnicodeOutputBase)): | |
| 335 return False | |
| 336 | |
| 337 try: | |
| 338 # SetConsoleCP and SetConsoleOutputCP could be used to change the code page | |
| 339 # but it's not really useful since the code here is using WriteConsoleW(). | |
| 340 # Also, changing the code page is 'permanent' to the console and needs to be | |
| 341 # reverted manually. | |
| 342 # In practice one needs to set the console font to a TTF font to be able to | |
| 343 # see all the characters but it failed for me in practice. In any case, it | |
| 344 # won't throw any exception when printing, which is the important part. | |
| 345 # -11 and -12 are defined in stdio.h | |
| 346 sys.stdout = win_get_unicode_stream(sys.stdout, 1, -11, encoding) | |
| 347 sys.stderr = win_get_unicode_stream(sys.stderr, 2, -12, encoding) | |
| 348 # TODO(maruel): Do sys.stdin with ReadConsoleW(). Albeit the limitation is | |
| 349 # "It doesn't appear to be possible to read Unicode characters in UTF-8 | |
| 350 # mode" and this appears to be a limitation of cmd.exe. | |
| 351 except Exception, e: | |
| 352 complain('exception %r while fixing up sys.stdout and sys.stderr' % e) | |
| 353 return True | |
| 354 | |
| 355 | |
| 356 def fix_encoding(): | |
| 357 """Fixes various encoding problems on all platforms. | |
| 358 | |
| 359 Should be called at the very begining of the process. | |
| 360 """ | |
| 361 ret = True | |
| 362 if sys.platform == 'win32': | |
| 363 ret &= fix_win_codec() | |
| 364 | |
| 365 ret &= fix_default_encoding() | |
| 366 | |
| 367 if sys.platform == 'win32': | |
| 368 encoding = sys.getdefaultencoding() | |
| 369 ret &= fix_win_sys_argv(encoding) | |
| 370 ret &= fix_win_console(encoding) | |
| 371 return ret | |
| OLD | NEW |