third_party/google-endpoints/future/utils/surrogateescape.py - Issue 2666783008: Add google-endpoints to third_party/.

Side by Side Diff: third_party/google-endpoints/future/utils/surrogateescape.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 """

	2 This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogatees cape" error

	3 handler of Python 3.

	4

	5 Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc

	6 """

	7

	8 # This code is released under the Python license and the BSD 2-clause license

	9

	10 import codecs

	11 import sys

	12

	13 from future import utils

	14

	15

	16 FS_ERRORS = 'surrogateescape'

	17

	18 # # -- Python 2/3 compatibility -------------------------------------

	19 # FS_ERRORS = 'my_surrogateescape'

	20

	21 def u(text):

	22 if utils.PY3:

	23 return text

	24 else:

	25 return text.decode('unicode_escape')

	26

	27 def b(data):

	28 if utils.PY3:

	29 return data.encode('latin1')

	30 else:

	31 return data

	32

	33 if utils.PY3:

	34 _unichr = chr

	35 bytes_chr = lambda code: bytes((code,))

	36 else:

	37 _unichr = unichr

	38 bytes_chr = chr

	39

	40 def surrogateescape_handler(exc):

	41 """

	42 Pure Python implementation of the PEP 383: the "surrogateescape" error

	43 handler of Python 3. Undecodable bytes will be replaced by a Unicode

	44 character U+DCxx on decoding, and these are translated into the

	45 original bytes on encoding.

	46 """

	47 mystring = exc.object[exc.start:exc.end]

	48

	49 try:

	50 if isinstance(exc, UnicodeDecodeError):

	51 # mystring is a byte-string in this case

	52 decoded = replace_surrogate_decode(mystring)

	53 elif isinstance(exc, UnicodeEncodeError):

	54 # In the case of u'\udcc3'.encode('ascii',

	55 # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an

	56 # exception anyway after this function is called, even though I thin k

	57 # it's doing what it should. It seems that the strict encoder is cal led

	58 # to encode the unicode string that this function returns ...

	59 decoded = replace_surrogate_encode(mystring)

	60 else:

	61 raise exc

	62 except NotASurrogateError:

	63 raise exc

	64 return (decoded, exc.end)

	65

	66

	67 class NotASurrogateError(Exception):

	68 pass

	69

	70

	71 def replace_surrogate_encode(mystring):

	72 """

	73 Returns a (unicode) string, not the more logical bytes, because the codecs

	74 register_error functionality expects this.

	75 """

	76 decoded = []

	77 for ch in mystring:

	78 # if utils.PY3:

	79 # code = ch

	80 # else:

	81 code = ord(ch)

	82

	83 # The following magic comes from Py3.3's Python/codecs.c file:

	84 if not 0xD800 <= code <= 0xDCFF:

	85 # Not a surrogate. Fail with the original exception.

	86 raise exc

	87 # mybytes = [0xe0 \| (code >> 12),

	88 # 0x80 \| ((code >> 6) & 0x3f),

	89 # 0x80 \| (code & 0x3f)]

	90 # Is this a good idea?

	91 if 0xDC00 <= code <= 0xDC7F:

	92 decoded.append(_unichr(code - 0xDC00))

	93 elif code <= 0xDCFF:

	94 decoded.append(_unichr(code - 0xDC00))

	95 else:

	96 raise NotASurrogateError

	97 return str().join(decoded)

	98

	99

	100 def replace_surrogate_decode(mybytes):

	101 """

	102 Returns a (unicode) string

	103 """

	104 decoded = []

	105 for ch in mybytes:

	106 # We may be parsing newbytes (in which case ch is an int) or a native

	107 # str on Py2

	108 if isinstance(ch, int):

	109 code = ch

	110 else:

	111 code = ord(ch)

	112 if 0x80 <= code <= 0xFF:

	113 decoded.append(_unichr(0xDC00 + code))

	114 elif code <= 0x7F:

	115 decoded.append(_unichr(code))

	116 else:

	117 # # It may be a bad byte

	118 # # Try swallowing it.

	119 # continue

	120 # print("RAISE!")

	121 raise NotASurrogateError

	122 return str().join(decoded)

	123

	124

	125 def encodefilename(fn):

	126 if FS_ENCODING == 'ascii':

	127 # ASCII encoder of Python 2 expects that the error handler returns a

	128 # Unicode string encodable to ASCII, whereas our surrogateescape error

	129 # handler has to return bytes in 0x80-0xFF range.

	130 encoded = []

	131 for index, ch in enumerate(fn):

	132 code = ord(ch)

	133 if code < 128:

	134 ch = bytes_chr(code)

	135 elif 0xDC80 <= code <= 0xDCFF:

	136 ch = bytes_chr(code - 0xDC00)

	137 else:

	138 raise UnicodeEncodeError(FS_ENCODING,

	139 fn, index, index+1,

	140 'ordinal not in range(128)')

	141 encoded.append(ch)

	142 return bytes().join(encoded)

	143 elif FS_ENCODING == 'utf-8':

	144 # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF

	145 # doesn't go through our error handler

	146 encoded = []

	147 for index, ch in enumerate(fn):

	148 code = ord(ch)

	149 if 0xD800 <= code <= 0xDFFF:

	150 if 0xDC80 <= code <= 0xDCFF:

	151 ch = bytes_chr(code - 0xDC00)

	152 encoded.append(ch)

	153 else:

	154 raise UnicodeEncodeError(

	155 FS_ENCODING,

	156 fn, index, index+1, 'surrogates not allowed')

	157 else:

	158 ch_utf8 = ch.encode('utf-8')

	159 encoded.append(ch_utf8)

	160 return bytes().join(encoded)

	161 else:

	162 return fn.encode(FS_ENCODING, FS_ERRORS)

	163

	164 def decodefilename(fn):

	165 return fn.decode(FS_ENCODING, FS_ERRORS)

	166

	167 FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')

	168 # FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')

	169 # FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')

	170

	171

	172 # normalize the filesystem encoding name.

	173 # For example, we expect "utf-8", not "UTF8".

	174 FS_ENCODING = codecs.lookup(FS_ENCODING).name

	175

	176

	177 def register_surrogateescape():

	178 """

	179 Registers the surrogateescape error handler on Python 2 (only)

	180 """

	181 if utils.PY3:

	182 return

	183 try:

	184 codecs.lookup_error(FS_ERRORS)

	185 except LookupError:

	186 codecs.register_error(FS_ERRORS, surrogateescape_handler)

	187

	188

	189 if __name__ == '__main__':

	190 pass

	191 # # Tests:

	192 # register_surrogateescape()

	193

	194 # b = decodefilename(fn)

	195 # assert b == encoded, "%r != %r" % (b, encoded)

	196 # c = encodefilename(b)

	197 # assert c == fn, '%r != %r' % (c, fn)

	198 # # print("ok")

	199

	200

OLD	NEW

« no previous file with comments | « third_party/google-endpoints/future/utils/__init__.py ('k') | third_party/google-endpoints/google/api/__init__.py » ('j') | no next file with comments »