third_party/google-endpoints/future/backports/urllib/parse.py - Issue 2666783008: Add google-endpoints to third_party/.

Side by Side Diff: third_party/google-endpoints/future/backports/urllib/parse.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/google-endpoints/future/backports/urllib/error.py ('k') | third_party/google-endpoints/future/backports/urllib/request.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 """

	2 Ported using Python-Future from the Python 3.3 standard library.

	3

	4 Parse (absolute and relative) URLs.

	5

	6 urlparse module is based upon the following RFC specifications.

	7

	8 RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding

	9 and L. Masinter, January 2005.

	10

	11 RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter

	12 and L.Masinter, December 1999.

	13

	14 RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.

	15 Berners-Lee, R. Fielding, and L. Masinter, August 1998.

	16

	17 RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.

	18

	19 RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June

	20 1995.

	21

	22 RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.

	23 McCahill, December 1994

	24

	25 RFC 3986 is considered the current standard and any future changes to

	26 urlparse module should conform with it. The urlparse module is

	27 currently not entirely compliant with this RFC due to defacto

	28 scenarios for parsing, and for backward compatibility purposes, some

	29 parsing quirks from older RFCs are retained. The testcases in

	30 test_urlparse.py provides a good indicator of parsing behavior.

	31 """

	32 from __future__ import absolute_import, division, unicode_literals

	33 from future.builtins import bytes, chr, dict, int, range, str

	34 from future.utils import raise_with_traceback

	35

	36 import re

	37 import sys

	38 import collections

	39

	40 __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",

	41 "urlsplit", "urlunsplit", "urlencode", "parse_qs",

	42 "parse_qsl", "quote", "quote_plus", "quote_from_bytes",

	43 "unquote", "unquote_plus", "unquote_to_bytes"]

	44

	45 # A classification of schemes ('' means apply by default)

	46 uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',

	47 'wais', 'file', 'https', 'shttp', 'mms',

	48 'prospero', 'rtsp', 'rtspu', '', 'sftp',

	49 'svn', 'svn+ssh']

	50 uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',

	51 'imap', 'wais', 'file', 'mms', 'https', 'shttp',

	52 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',

	53 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']

	54 uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',

	55 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',

	56 'mms', '', 'sftp', 'tel']

	57

	58 # These are not actually used anymore, but should stay for backwards

	59 # compatibility. (They are undocumented, but have a public-looking name.)

	60 non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',

	61 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']

	62 uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',

	63 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']

	64 uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',

	65 'nntp', 'wais', 'https', 'shttp', 'snews',

	66 'file', 'prospero', '']

	67

	68 # Characters valid in scheme names

	69 scheme_chars = ('abcdefghijklmnopqrstuvwxyz'

	70 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

	71 '0123456789'

	72 '+-.')

	73

	74 # XXX: Consider replacing with functools.lru_cache

	75 MAX_CACHE_SIZE = 20

	76 _parse_cache = {}

	77

	78 def clear_cache():

	79 """Clear the parse cache and the quoters cache."""

	80 _parse_cache.clear()

	81 _safe_quoters.clear()

	82

	83

	84 # Helpers for bytes handling

	85 # For 3.2, we deliberately require applications that

	86 # handle improperly quoted URLs to do their own

	87 # decoding and encoding. If valid use cases are

	88 # presented, we may relax this by using latin-1

	89 # decoding internally for 3.3

	90 _implicit_encoding = 'ascii'

	91 _implicit_errors = 'strict'

	92

	93 def _noop(obj):

	94 return obj

	95

	96 def _encode_result(obj, encoding=_implicit_encoding,

	97 errors=_implicit_errors):

	98 return obj.encode(encoding, errors)

	99

	100 def _decode_args(args, encoding=_implicit_encoding,

	101 errors=_implicit_errors):

	102 return tuple(x.decode(encoding, errors) if x else '' for x in args)

	103

	104 def _coerce_args(*args):

	105 # Invokes decode if necessary to create str args

	106 # and returns the coerced inputs along with

	107 # an appropriate result coercion function

	108 # - noop for str inputs

	109 # - encoding function otherwise

	110 str_input = isinstance(args[0], str)

	111 for arg in args[1:]:

	112 # We special-case the empty string to support the

	113 # "scheme=''" default argument to some functions

	114 if arg and isinstance(arg, str) != str_input:

	115 raise TypeError("Cannot mix str and non-str arguments")

	116 if str_input:

	117 return args + (_noop,)

	118 return _decode_args(args) + (_encode_result,)

	119

	120 # Result objects are more helpful than simple tuples

	121 class _ResultMixinStr(object):

	122 """Standard approach to encoding parsed results from str to bytes"""

	123 __slots__ = ()

	124

	125 def encode(self, encoding='ascii', errors='strict'):

	126 return self._encoded_counterpart(*(x.encode(encoding, errors) for x in s elf))

	127

	128

	129 class _ResultMixinBytes(object):

	130 """Standard approach to decoding parsed results from bytes to str"""

	131 __slots__ = ()

	132

	133 def decode(self, encoding='ascii', errors='strict'):

	134 return self._decoded_counterpart(*(x.decode(encoding, errors) for x in s elf))

	135

	136

	137 class _NetlocResultMixinBase(object):

	138 """Shared methods for the parsed result objects containing a netloc element" ""

	139 __slots__ = ()

	140

	141 @property

	142 def username(self):

	143 return self._userinfo[0]

	144

	145 @property

	146 def password(self):

	147 return self._userinfo[1]

	148

	149 @property

	150 def hostname(self):

	151 hostname = self._hostinfo[0]

	152 if not hostname:

	153 hostname = None

	154 elif hostname is not None:

	155 hostname = hostname.lower()

	156 return hostname

	157

	158 @property

	159 def port(self):

	160 port = self._hostinfo[1]

	161 if port is not None:

	162 port = int(port, 10)

	163 # Return None on an illegal port

	164 if not ( 0 <= port <= 65535):

	165 return None

	166 return port

	167

	168

	169 class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):

	170 __slots__ = ()

	171

	172 @property

	173 def _userinfo(self):

	174 netloc = self.netloc

	175 userinfo, have_info, hostinfo = netloc.rpartition('@')

	176 if have_info:

	177 username, have_password, password = userinfo.partition(':')

	178 if not have_password:

	179 password = None

	180 else:

	181 username = password = None

	182 return username, password

	183

	184 @property

	185 def _hostinfo(self):

	186 netloc = self.netloc

	187 _, _, hostinfo = netloc.rpartition('@')

	188 _, have_open_br, bracketed = hostinfo.partition('[')

	189 if have_open_br:

	190 hostname, _, port = bracketed.partition(']')

	191 _, have_port, port = port.partition(':')

	192 else:

	193 hostname, have_port, port = hostinfo.partition(':')

	194 if not have_port:

	195 port = None

	196 return hostname, port

	197

	198

	199 class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):

	200 __slots__ = ()

	201

	202 @property

	203 def _userinfo(self):

	204 netloc = self.netloc

	205 userinfo, have_info, hostinfo = netloc.rpartition(b'@')

	206 if have_info:

	207 username, have_password, password = userinfo.partition(b':')

	208 if not have_password:

	209 password = None

	210 else:

	211 username = password = None

	212 return username, password

	213

	214 @property

	215 def _hostinfo(self):

	216 netloc = self.netloc

	217 _, _, hostinfo = netloc.rpartition(b'@')

	218 _, have_open_br, bracketed = hostinfo.partition(b'[')

	219 if have_open_br:

	220 hostname, _, port = bracketed.partition(b']')

	221 _, have_port, port = port.partition(b':')

	222 else:

	223 hostname, have_port, port = hostinfo.partition(b':')

	224 if not have_port:

	225 port = None

	226 return hostname, port

	227

	228

	229 from collections import namedtuple

	230

	231 _DefragResultBase = namedtuple('DefragResult', 'url fragment')

	232 _SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment' )

	233 _ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fr agment')

	234

	235 # For backwards compatibility, alias _NetlocResultMixinStr

	236 # ResultBase is no longer part of the documented API, but it is

	237 # retained since deprecating it isn't worth the hassle

	238 ResultBase = _NetlocResultMixinStr

	239

	240 # Structured result objects for string data

	241 class DefragResult(_DefragResultBase, _ResultMixinStr):

	242 __slots__ = ()

	243 def geturl(self):

	244 if self.fragment:

	245 return self.url + '#' + self.fragment

	246 else:

	247 return self.url

	248

	249 class SplitResult(_SplitResultBase, _NetlocResultMixinStr):

	250 __slots__ = ()

	251 def geturl(self):

	252 return urlunsplit(self)

	253

	254 class ParseResult(_ParseResultBase, _NetlocResultMixinStr):

	255 __slots__ = ()

	256 def geturl(self):

	257 return urlunparse(self)

	258

	259 # Structured result objects for bytes data

	260 class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):

	261 __slots__ = ()

	262 def geturl(self):

	263 if self.fragment:

	264 return self.url + b'#' + self.fragment

	265 else:

	266 return self.url

	267

	268 class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):

	269 __slots__ = ()

	270 def geturl(self):

	271 return urlunsplit(self)

	272

	273 class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):

	274 __slots__ = ()

	275 def geturl(self):

	276 return urlunparse(self)

	277

	278 # Set up the encode/decode result pairs

	279 def _fix_result_transcoding():

	280 _result_pairs = (

	281 (DefragResult, DefragResultBytes),

	282 (SplitResult, SplitResultBytes),

	283 (ParseResult, ParseResultBytes),

	284 )

	285 for _decoded, _encoded in _result_pairs:

	286 _decoded._encoded_counterpart = _encoded

	287 _encoded._decoded_counterpart = _decoded

	288

	289 _fix_result_transcoding()

	290 del _fix_result_transcoding

	291

	292 def urlparse(url, scheme='', allow_fragments=True):

	293 """Parse a URL into 6 components:

	294 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

	295 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).

	296 Note that we don't break the components up in smaller bits

	297 (e.g. netloc is a single string) and we don't expand % escapes."""

	298 url, scheme, _coerce_result = _coerce_args(url, scheme)

	299 splitresult = urlsplit(url, scheme, allow_fragments)

	300 scheme, netloc, url, query, fragment = splitresult

	301 if scheme in uses_params and ';' in url:

	302 url, params = _splitparams(url)

	303 else:

	304 params = ''

	305 result = ParseResult(scheme, netloc, url, params, query, fragment)

	306 return _coerce_result(result)

	307

	308 def _splitparams(url):

	309 if '/' in url:

	310 i = url.find(';', url.rfind('/'))

	311 if i < 0:

	312 return url, ''

	313 else:

	314 i = url.find(';')

	315 return url[:i], url[i+1:]

	316

	317 def _splitnetloc(url, start=0):

	318 delim = len(url) # position of end of domain part of url, default is end

	319 for c in '/?#': # look for delimiters; the order is NOT important

	320 wdelim = url.find(c, start) # find first of this delim

	321 if wdelim >= 0: # if found

	322 delim = min(delim, wdelim) # use earliest delim position

	323 return url[start:delim], url[delim:] # return (domain, rest)

	324

	325 def urlsplit(url, scheme='', allow_fragments=True):

	326 """Parse a URL into 5 components:

	327 <scheme>://<netloc>/<path>?<query>#<fragment>

	328 Return a 5-tuple: (scheme, netloc, path, query, fragment).

	329 Note that we don't break the components up in smaller bits

	330 (e.g. netloc is a single string) and we don't expand % escapes."""

	331 url, scheme, _coerce_result = _coerce_args(url, scheme)

	332 allow_fragments = bool(allow_fragments)

	333 key = url, scheme, allow_fragments, type(url), type(scheme)

	334 cached = _parse_cache.get(key, None)

	335 if cached:

	336 return _coerce_result(cached)

	337 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth

	338 clear_cache()

	339 netloc = query = fragment = ''

	340 i = url.find(':')

	341 if i > 0:

	342 if url[:i] == 'http': # optimize the common case

	343 scheme = url[:i].lower()

	344 url = url[i+1:]

	345 if url[:2] == '//':

	346 netloc, url = _splitnetloc(url, 2)

	347 if (('[' in netloc and ']' not in netloc) or

	348 (']' in netloc and '[' not in netloc)):

	349 raise ValueError("Invalid IPv6 URL")

	350 if allow_fragments and '#' in url:

	351 url, fragment = url.split('#', 1)

	352 if '?' in url:

	353 url, query = url.split('?', 1)

	354 v = SplitResult(scheme, netloc, url, query, fragment)

	355 _parse_cache[key] = v

	356 return _coerce_result(v)

	357 for c in url[:i]:

	358 if c not in scheme_chars:

	359 break

	360 else:

	361 # make sure "url" is not actually a port number (in which case

	362 # "scheme" is really part of the path)

	363 rest = url[i+1:]

	364 if not rest or any(c not in '0123456789' for c in rest):

	365 # not a port number

	366 scheme, url = url[:i].lower(), rest

	367

	368 if url[:2] == '//':

	369 netloc, url = _splitnetloc(url, 2)

	370 if (('[' in netloc and ']' not in netloc) or

	371 (']' in netloc and '[' not in netloc)):

	372 raise ValueError("Invalid IPv6 URL")

	373 if allow_fragments and '#' in url:

	374 url, fragment = url.split('#', 1)

	375 if '?' in url:

	376 url, query = url.split('?', 1)

	377 v = SplitResult(scheme, netloc, url, query, fragment)

	378 _parse_cache[key] = v

	379 return _coerce_result(v)

	380

	381 def urlunparse(components):

	382 """Put a parsed URL back together again. This may result in a

	383 slightly different, but equivalent URL, if the URL that was parsed

	384 originally had redundant delimiters, e.g. a ? with an empty query

	385 (the draft states that these are equivalent)."""

	386 scheme, netloc, url, params, query, fragment, _coerce_result = (

	387 _coerce_args(*components))

	388 if params:

	389 url = "%s;%s" % (url, params)

	390 return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))

	391

	392 def urlunsplit(components):

	393 """Combine the elements of a tuple as returned by urlsplit() into a

	394 complete URL as a string. The data argument can be any five-item iterable.

	395 This may result in a slightly different, but equivalent URL, if the URL that

	396 was parsed originally had unnecessary delimiters (for example, a ? with an

	397 empty query; the RFC states that these are equivalent)."""

	398 scheme, netloc, url, query, fragment, _coerce_result = (

	399 _coerce_args(*components))

	400 if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):

	401 if url and url[:1] != '/': url = '/' + url

	402 url = '//' + (netloc or '') + url

	403 if scheme:

	404 url = scheme + ':' + url

	405 if query:

	406 url = url + '?' + query

	407 if fragment:

	408 url = url + '#' + fragment

	409 return _coerce_result(url)

	410

	411 def urljoin(base, url, allow_fragments=True):

	412 """Join a base URL and a possibly relative URL to form an absolute

	413 interpretation of the latter."""

	414 if not base:

	415 return url

	416 if not url:

	417 return base

	418 base, url, _coerce_result = _coerce_args(base, url)

	419 bscheme, bnetloc, bpath, bparams, bquery, bfragment = \

	420 urlparse(base, '', allow_fragments)

	421 scheme, netloc, path, params, query, fragment = \

	422 urlparse(url, bscheme, allow_fragments)

	423 if scheme != bscheme or scheme not in uses_relative:

	424 return _coerce_result(url)

	425 if scheme in uses_netloc:

	426 if netloc:

	427 return _coerce_result(urlunparse((scheme, netloc, path,

	428 params, query, fragment)))

	429 netloc = bnetloc

	430 if path[:1] == '/':

	431 return _coerce_result(urlunparse((scheme, netloc, path,

	432 params, query, fragment)))

	433 if not path and not params:

	434 path = bpath

	435 params = bparams

	436 if not query:

	437 query = bquery

	438 return _coerce_result(urlunparse((scheme, netloc, path,

	439 params, query, fragment)))

	440 segments = bpath.split('/')[:-1] + path.split('/')

	441 # XXX The stuff below is bogus in various ways...

	442 if segments[-1] == '.':

	443 segments[-1] = ''

	444 while '.' in segments:

	445 segments.remove('.')

	446 while 1:

	447 i = 1

	448 n = len(segments) - 1

	449 while i < n:

	450 if (segments[i] == '..'

	451 and segments[i-1] not in ('', '..')):

	452 del segments[i-1:i+1]

	453 break

	454 i = i+1

	455 else:

	456 break

	457 if segments == ['', '..']:

	458 segments[-1] = ''

	459 elif len(segments) >= 2 and segments[-1] == '..':

	460 segments[-2:] = ['']

	461 return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments),

	462 params, query, fragment)))

	463

	464 def urldefrag(url):

	465 """Removes any existing fragment from URL.

	466

	467 Returns a tuple of the defragmented URL and the fragment. If

	468 the URL contained no fragments, the second element is the

	469 empty string.

	470 """

	471 url, _coerce_result = _coerce_args(url)

	472 if '#' in url:

	473 s, n, p, a, q, frag = urlparse(url)

	474 defrag = urlunparse((s, n, p, a, q, ''))

	475 else:

	476 frag = ''

	477 defrag = url

	478 return _coerce_result(DefragResult(defrag, frag))

	479

	480 _hexdig = '0123456789ABCDEFabcdef'

	481 _hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)]))

	482 for a in _hexdig for b in _hexdig)

	483

	484 def unquote_to_bytes(string):

	485 """unquote_to_bytes('abc%20def') -> b'abc def'."""

	486 # Note: strings are encoded as UTF-8. This is only an issue if it contains

	487 # unescaped non-ASCII characters, which URIs should not.

	488 if not string:

	489 # Is it a string-like object?

	490 string.split

	491 return bytes(b'')

	492 if isinstance(string, str):

	493 string = string.encode('utf-8')

	494 ### For Python-Future:

	495 # It is already a byte-string object, but force it to be newbytes here on

	496 # Py2:

	497 string = bytes(string)

	498 ###

	499 bits = string.split(b'%')

	500 if len(bits) == 1:

	501 return string

	502 res = [bits[0]]

	503 append = res.append

	504 for item in bits[1:]:

	505 try:

	506 append(_hextobyte[item[:2]])

	507 append(item[2:])

	508 except KeyError:

	509 append(b'%')

	510 append(item)

	511 return bytes(b'').join(res)

	512

	513 _asciire = re.compile('([\x00-\x7f]+)')

	514

	515 def unquote(string, encoding='utf-8', errors='replace'):

	516 """Replace %xx escapes by their single-character equivalent. The optional

	517 encoding and errors parameters specify how to decode percent-encoded

	518 sequences into Unicode characters, as accepted by the bytes.decode()

	519 method.

	520 By default, percent-encoded sequences are decoded with UTF-8, and invalid

	521 sequences are replaced by a placeholder character.

	522

	523 unquote('abc%20def') -> 'abc def'.

	524 """

	525 if '%' not in string:

	526 string.split

	527 return string

	528 if encoding is None:

	529 encoding = 'utf-8'

	530 if errors is None:

	531 errors = 'replace'

	532 bits = _asciire.split(string)

	533 res = [bits[0]]

	534 append = res.append

	535 for i in range(1, len(bits), 2):

	536 append(unquote_to_bytes(bits[i]).decode(encoding, errors))

	537 append(bits[i + 1])

	538 return ''.join(res)

	539

	540 def parse_qs(qs, keep_blank_values=False, strict_parsing=False,

	541 encoding='utf-8', errors='replace'):

	542 """Parse a query given as a string argument.

	543

	544 Arguments:

	545

	546 qs: percent-encoded query string to be parsed

	547

	548 keep_blank_values: flag indicating whether blank values in

	549 percent-encoded queries should be treated as blank strings.

	550 A true value indicates that blanks should be retained as

	551 blank strings. The default false value indicates that

	552 blank values are to be ignored and treated as if they were

	553 not included.

	554

	555 strict_parsing: flag indicating what to do with parsing errors.

	556 If false (the default), errors are silently ignored.

	557 If true, errors raise a ValueError exception.

	558

	559 encoding and errors: specify how to decode percent-encoded sequences

	560 into Unicode characters, as accepted by the bytes.decode() method.

	561 """

	562 parsed_result = {}

	563 pairs = parse_qsl(qs, keep_blank_values, strict_parsing,

	564 encoding=encoding, errors=errors)

	565 for name, value in pairs:

	566 if name in parsed_result:

	567 parsed_result[name].append(value)

	568 else:

	569 parsed_result[name] = [value]

	570 return parsed_result

	571

	572 def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,

	573 encoding='utf-8', errors='replace'):

	574 """Parse a query given as a string argument.

	575

	576 Arguments:

	577

	578 qs: percent-encoded query string to be parsed

	579

	580 keep_blank_values: flag indicating whether blank values in

	581 percent-encoded queries should be treated as blank strings. A

	582 true value indicates that blanks should be retained as blank

	583 strings. The default false value indicates that blank values

	584 are to be ignored and treated as if they were not included.

	585

	586 strict_parsing: flag indicating what to do with parsing errors. If

	587 false (the default), errors are silently ignored. If true,

	588 errors raise a ValueError exception.

	589

	590 encoding and errors: specify how to decode percent-encoded sequences

	591 into Unicode characters, as accepted by the bytes.decode() method.

	592

	593 Returns a list, as G-d intended.

	594 """

	595 qs, _coerce_result = _coerce_args(qs)

	596 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]

	597 r = []

	598 for name_value in pairs:

	599 if not name_value and not strict_parsing:

	600 continue

	601 nv = name_value.split('=', 1)

	602 if len(nv) != 2:

	603 if strict_parsing:

	604 raise ValueError("bad query field: %r" % (name_value,))

	605 # Handle case of a control-name with no equal sign

	606 if keep_blank_values:

	607 nv.append('')

	608 else:

	609 continue

	610 if len(nv[1]) or keep_blank_values:

	611 name = nv[0].replace('+', ' ')

	612 name = unquote(name, encoding=encoding, errors=errors)

	613 name = _coerce_result(name)

	614 value = nv[1].replace('+', ' ')

	615 value = unquote(value, encoding=encoding, errors=errors)

	616 value = _coerce_result(value)

	617 r.append((name, value))

	618 return r

	619

	620 def unquote_plus(string, encoding='utf-8', errors='replace'):

	621 """Like unquote(), but also replace plus signs by spaces, as required for

	622 unquoting HTML form values.

	623

	624 unquote_plus('%7e/abc+def') -> '~/abc def'

	625 """

	626 string = string.replace('+', ' ')

	627 return unquote(string, encoding, errors)

	628

	629 _ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

	630 b'abcdefghijklmnopqrstuvwxyz'

	631 b'0123456789'

	632 b'_.-'))

	633 _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)

	634 _safe_quoters = {}

	635

	636 class Quoter(collections.defaultdict):

	637 """A mapping from bytes (in range(0,256)) to strings.

	638

	639 String values are percent-encoded byte values, unless the key < 128, and

	640 in the "safe" set (either the specified safe set, or default set).

	641 """

	642 # Keeps a cache internally, using defaultdict, for efficiency (lookups

	643 # of cached keys don't call Python code at all).

	644 def __init__(self, safe):

	645 """safe: bytes object."""

	646 self.safe = _ALWAYS_SAFE.union(bytes(safe))

	647

	648 def __repr__(self):

	649 # Without this, will just display as a defaultdict

	650 return "<Quoter %r>" % dict(self)

	651

	652 def __missing__(self, b):

	653 # Handle a cache miss. Store quoted string in cache and return.

	654 res = chr(b) if b in self.safe else '%{0:02X}'.format(b)

	655 self[b] = res

	656 return res

	657

	658 def quote(string, safe='/', encoding=None, errors=None):

	659 """quote('abc def') -> 'abc%20def'

	660

	661 Each part of a URL, e.g. the path info, the query, etc., has a

	662 different set of reserved characters that must be quoted.

	663

	664 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists

	665 the following reserved characters.

	666

	667 reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|

	668 "$" \| ","

	669

	670 Each of these characters is reserved in some component of a URL,

	671 but not necessarily in all of them.

	672

	673 By default, the quote function is intended for quoting the path

	674 section of a URL. Thus, it will not encode '/'. This character

	675 is reserved, but in typical usage the quote function is being

	676 called on a path where the existing slash characters are used as

	677 reserved characters.

	678

	679 string and safe may be either str or bytes objects. encoding must

	680 not be specified if string is a str.

	681

	682 The optional encoding and errors parameters specify how to deal with

	683 non-ASCII characters, as accepted by the str.encode method.

	684 By default, encoding='utf-8' (characters are encoded with UTF-8), and

	685 errors='strict' (unsupported characters raise a UnicodeEncodeError).

	686 """

	687 if isinstance(string, str):

	688 if not string:

	689 return string

	690 if encoding is None:

	691 encoding = 'utf-8'

	692 if errors is None:

	693 errors = 'strict'

	694 string = string.encode(encoding, errors)

	695 else:

	696 if encoding is not None:

	697 raise TypeError("quote() doesn't support 'encoding' for bytes")

	698 if errors is not None:

	699 raise TypeError("quote() doesn't support 'errors' for bytes")

	700 return quote_from_bytes(string, safe)

	701

	702 def quote_plus(string, safe='', encoding=None, errors=None):

	703 """Like quote(), but also replace ' ' with '+', as required for quoting

	704 HTML form values. Plus signs in the original string are escaped unless

	705 they are included in safe. It also does not have safe default to '/'.

	706 """

	707 # Check if ' ' in string, where string may either be a str or bytes. If

	708 # there are no spaces, the regular quote will produce the right answer.

	709 if ((isinstance(string, str) and ' ' not in string) or

	710 (isinstance(string, bytes) and b' ' not in string)):

	711 return quote(string, safe, encoding, errors)

	712 if isinstance(safe, str):

	713 space = str(' ')

	714 else:

	715 space = bytes(b' ')

	716 string = quote(string, safe + space, encoding, errors)

	717 return string.replace(' ', '+')

	718

	719 def quote_from_bytes(bs, safe='/'):

	720 """Like quote(), but accepts a bytes object rather than a str, and does

	721 not perform string-to-bytes encoding. It always returns an ASCII string.

	722 quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'

	723 """

	724 if not isinstance(bs, (bytes, bytearray)):

	725 raise TypeError("quote_from_bytes() expected bytes")

	726 if not bs:

	727 return str('')

	728 ### For Python-Future:

	729 bs = bytes(bs)

	730 ###

	731 if isinstance(safe, str):

	732 # Normalize 'safe' by converting to bytes and removing non-ASCII chars

	733 safe = str(safe).encode('ascii', 'ignore')

	734 else:

	735 ### For Python-Future:

	736 safe = bytes(safe)

	737 ###

	738 safe = bytes([c for c in safe if c < 128])

	739 if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):

	740 return bs.decode()

	741 try:

	742 quoter = _safe_quoters[safe]

	743 except KeyError:

	744 _safe_quoters[safe] = quoter = Quoter(safe).__getitem__

	745 return str('').join([quoter(char) for char in bs])

	746

	747 def urlencode(query, doseq=False, safe='', encoding=None, errors=None):

	748 """Encode a sequence of two-element tuples or dictionary into a URL query st ring.

	749

	750 If any values in the query arg are sequences and doseq is true, each

	751 sequence element is converted to a separate parameter.

	752

	753 If the query arg is a sequence of two-element tuples, the order of the

	754 parameters in the output will match the order of parameters in the

	755 input.

	756

	757 The query arg may be either a string or a bytes type. When query arg is a

	758 string, the safe, encoding and error parameters are sent the quote_plus for

	759 encoding.

	760 """

	761

	762 if hasattr(query, "items"):

	763 query = query.items()

	764 else:

	765 # It's a bother at times that strings and string-like objects are

	766 # sequences.

	767 try:

	768 # non-sequence items should not work with len()

	769 # non-empty strings will fail this

	770 if len(query) and not isinstance(query[0], tuple):

	771 raise TypeError

	772 # Zero-length sequences of all types will get here and succeed,

	773 # but that's a minor nit. Since the original implementation

	774 # allowed empty dicts that type of behavior probably should be

	775 # preserved for consistency

	776 except TypeError:

	777 ty, va, tb = sys.exc_info()

	778 raise_with_traceback(TypeError("not a valid non-string sequence "

	779 "or mapping object"), tb)

	780

	781 l = []

	782 if not doseq:

	783 for k, v in query:

	784 if isinstance(k, bytes):

	785 k = quote_plus(k, safe)

	786 else:

	787 k = quote_plus(str(k), safe, encoding, errors)

	788

	789 if isinstance(v, bytes):

	790 v = quote_plus(v, safe)

	791 else:

	792 v = quote_plus(str(v), safe, encoding, errors)

	793 l.append(k + '=' + v)

	794 else:

	795 for k, v in query:

	796 if isinstance(k, bytes):

	797 k = quote_plus(k, safe)

	798 else:

	799 k = quote_plus(str(k), safe, encoding, errors)

	800

	801 if isinstance(v, bytes):

	802 v = quote_plus(v, safe)

	803 l.append(k + '=' + v)

	804 elif isinstance(v, str):

	805 v = quote_plus(v, safe, encoding, errors)

	806 l.append(k + '=' + v)

	807 else:

	808 try:

	809 # Is this a sufficient test for sequence-ness?

	810 x = len(v)

	811 except TypeError:

	812 # not a sequence

	813 v = quote_plus(str(v), safe, encoding, errors)

	814 l.append(k + '=' + v)

	815 else:

	816 # loop over the sequence

	817 for elt in v:

	818 if isinstance(elt, bytes):

	819 elt = quote_plus(elt, safe)

	820 else:

	821 elt = quote_plus(str(elt), safe, encoding, errors)

	822 l.append(k + '=' + elt)

	823 return str('&').join(l)

	824

	825 # Utilities to parse URLs (most of these return None for missing parts):

	826 # unwrap('<URL:type://host/path>') --> 'type://host/path'

	827 # splittype('type:opaquestring') --> 'type', 'opaquestring'

	828 # splithost('//host[:port]/path') --> 'host[:port]', '/path'

	829 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'

	830 # splitpasswd('user:passwd') -> 'user', 'passwd'

	831 # splitport('host:port') --> 'host', 'port'

	832 # splitquery('/path?query') --> '/path', 'query'

	833 # splittag('/path#tag') --> '/path', 'tag'

	834 # splitattr('/path;attr1=value1;attr2=value2;...') ->

	835 # '/path', ['attr1=value1', 'attr2=value2', ...]

	836 # splitvalue('attr=value') --> 'attr', 'value'

	837 # urllib.parse.unquote('abc%20def') -> 'abc def'

	838 # quote('abc def') -> 'abc%20def')

	839

	840 def to_bytes(url):

	841 """to_bytes(u"URL") --> 'URL'."""

	842 # Most URL schemes require ASCII. If that changes, the conversion

	843 # can be relaxed.

	844 # XXX get rid of to_bytes()

	845 if isinstance(url, str):

	846 try:

	847 url = url.encode("ASCII").decode()

	848 except UnicodeError:

	849 raise UnicodeError("URL " + repr(url) +

	850 " contains non-ASCII characters")

	851 return url

	852

	853 def unwrap(url):

	854 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""

	855 url = str(url).strip()

	856 if url[:1] == '<' and url[-1:] == '>':

	857 url = url[1:-1].strip()

	858 if url[:4] == 'URL:': url = url[4:].strip()

	859 return url

	860

	861 _typeprog = None

	862 def splittype(url):

	863 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""

	864 global _typeprog

	865 if _typeprog is None:

	866 import re

	867 _typeprog = re.compile('^([^/:]+):')

	868

	869 match = _typeprog.match(url)

	870 if match:

	871 scheme = match.group(1)

	872 return scheme.lower(), url[len(scheme) + 1:]

	873 return None, url

	874

	875 _hostprog = None

	876 def splithost(url):

	877 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""

	878 global _hostprog

	879 if _hostprog is None:

	880 import re

	881 _hostprog = re.compile('^//([^/?])(.)$')

	882

	883 match = _hostprog.match(url)

	884 if match:

	885 host_port = match.group(1)

	886 path = match.group(2)

	887 if path and not path.startswith('/'):

	888 path = '/' + path

	889 return host_port, path

	890 return None, url

	891

	892 _userprog = None

	893 def splituser(host):

	894 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' ."""

	895 global _userprog

	896 if _userprog is None:

	897 import re

	898 _userprog = re.compile('^(.)@(.)$')

	899

	900 match = _userprog.match(host)

	901 if match: return match.group(1, 2)

	902 return None, host

	903

	904 _passwdprog = None

	905 def splitpasswd(user):

	906 """splitpasswd('user:passwd') -> 'user', 'passwd'."""

	907 global _passwdprog

	908 if _passwdprog is None:

	909 import re

	910 _passwdprog = re.compile('^([^:]):(.)$',re.S)

	911

	912 match = _passwdprog.match(user)

	913 if match: return match.group(1, 2)

	914 return user, None

	915

	916 # splittag('/path#tag') --> '/path', 'tag'

	917 _portprog = None

	918 def splitport(host):

	919 """splitport('host:port') --> 'host', 'port'."""

	920 global _portprog

	921 if _portprog is None:

	922 import re

	923 _portprog = re.compile('^(.*):([0-9]+)$')

	924

	925 match = _portprog.match(host)

	926 if match: return match.group(1, 2)

	927 return host, None

	928

	929 _nportprog = None

	930 def splitnport(host, defport=-1):

	931 """Split host and port, returning numeric port.

	932 Return given default port if no ':' found; defaults to -1.

	933 Return numerical port if a valid number are found after ':'.

	934 Return None if ':' but not a valid number."""

	935 global _nportprog

	936 if _nportprog is None:

	937 import re

	938 _nportprog = re.compile('^(.):(.)$')

	939

	940 match = _nportprog.match(host)

	941 if match:

	942 host, port = match.group(1, 2)

	943 try:

	944 if not port: raise ValueError("no digits")

	945 nport = int(port)

	946 except ValueError:

	947 nport = None

	948 return host, nport

	949 return host, defport

	950

	951 _queryprog = None

	952 def splitquery(url):

	953 """splitquery('/path?query') --> '/path', 'query'."""

	954 global _queryprog

	955 if _queryprog is None:

	956 import re

	957 _queryprog = re.compile('^(.)\?([^?])$')

	958

	959 match = _queryprog.match(url)

	960 if match: return match.group(1, 2)

	961 return url, None

	962

	963 _tagprog = None

	964 def splittag(url):

	965 """splittag('/path#tag') --> '/path', 'tag'."""

	966 global _tagprog

	967 if _tagprog is None:

	968 import re

	969 _tagprog = re.compile('^(.)#([^#])$')

	970

	971 match = _tagprog.match(url)

	972 if match: return match.group(1, 2)

	973 return url, None

	974

	975 def splitattr(url):

	976 """splitattr('/path;attr1=value1;attr2=value2;...') ->

	977 '/path', ['attr1=value1', 'attr2=value2', ...]."""

	978 words = url.split(';')

	979 return words[0], words[1:]

	980

	981 _valueprog = None

	982 def splitvalue(attr):

	983 """splitvalue('attr=value') --> 'attr', 'value'."""

	984 global _valueprog

	985 if _valueprog is None:

	986 import re

	987 _valueprog = re.compile('^([^=])=(.)$')

	988

	989 match = _valueprog.match(attr)

	990 if match: return match.group(1, 2)

	991 return attr, None

OLD	NEW