| Index: third_party/requests/utils.py
|
| diff --git a/third_party/requests/utils.py b/third_party/requests/utils.py
|
| index b21bf8fb76f7aee13a32932c850d6be658ea10fc..3ec61312aa263eb42eff4eb683b74261638cddd3 100644
|
| --- a/third_party/requests/utils.py
|
| +++ b/third_party/requests/utils.py
|
| @@ -21,9 +21,11 @@ from netrc import netrc, NetrcParseError
|
| from . import __version__
|
| from . import certs
|
| from .compat import parse_http_list as _parse_list_header
|
| -from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse
|
| +from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse,
|
| + is_py2, is_py3, builtin_str, getproxies, proxy_bypass)
|
| from .cookies import RequestsCookieJar, cookiejar_from_dict
|
| from .structures import CaseInsensitiveDict
|
| +from .exceptions import MissingSchema, InvalidURL
|
|
|
| _hush_pyflakes = (RequestsCookieJar,)
|
|
|
| @@ -264,8 +266,12 @@ def get_encodings_from_content(content):
|
| """
|
|
|
| charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
|
| + pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
|
| + xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
|
|
|
| - return charset_re.findall(content)
|
| + return (charset_re.findall(content) +
|
| + pragma_re.findall(content) +
|
| + xml_re.findall(content))
|
|
|
|
|
| def get_encoding_from_headers(headers):
|
| @@ -301,7 +307,7 @@ def stream_decode_response_unicode(iterator, r):
|
| rv = decoder.decode(chunk)
|
| if rv:
|
| yield rv
|
| - rv = decoder.decode('', final=True)
|
| + rv = decoder.decode(b'', final=True)
|
| if rv:
|
| yield rv
|
|
|
| @@ -361,7 +367,11 @@ def unquote_unreserved(uri):
|
| for i in range(1, len(parts)):
|
| h = parts[i][0:2]
|
| if len(h) == 2 and h.isalnum():
|
| - c = chr(int(h, 16))
|
| + try:
|
| + c = chr(int(h, 16))
|
| + except ValueError:
|
| + raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
|
| +
|
| if c in UNRESERVED_SET:
|
| parts[i] = c + parts[i][2:]
|
| else:
|
| @@ -386,25 +396,17 @@ def requote_uri(uri):
|
| def get_environ_proxies(url):
|
| """Return a dict of environment proxies."""
|
|
|
| - proxy_keys = [
|
| - 'all',
|
| - 'http',
|
| - 'https',
|
| - 'ftp',
|
| - 'socks'
|
| - ]
|
| -
|
| get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
|
|
|
| # First check whether no_proxy is defined. If it is, check that the URL
|
| # we're getting isn't in the no_proxy list.
|
| no_proxy = get_proxy('no_proxy')
|
| + netloc = urlparse(url).netloc
|
|
|
| if no_proxy:
|
| # We need to check whether we match here. We need to see if we match
|
| # the end of the netloc, both with and without the port.
|
| - no_proxy = no_proxy.split(',')
|
| - netloc = urlparse(url).netloc
|
| + no_proxy = no_proxy.replace(' ', '').split(',')
|
|
|
| for host in no_proxy:
|
| if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
|
| @@ -412,10 +414,15 @@ def get_environ_proxies(url):
|
| # to apply the proxies on this URL.
|
| return {}
|
|
|
| + # If the system proxy settings indicate that this URL should be bypassed,
|
| + # don't proxy.
|
| + if proxy_bypass(netloc):
|
| + return {}
|
| +
|
| # If we get here, we either didn't have no_proxy set or we're not going
|
| - # anywhere that no_proxy applies to.
|
| - proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys]
|
| - return dict([(key, val) for (key, val) in proxies if val])
|
| + # anywhere that no_proxy applies to, and the system settings don't require
|
| + # bypassing the proxy for the current URL.
|
| + return getproxies()
|
|
|
|
|
| def default_user_agent():
|
| @@ -526,18 +533,13 @@ def guess_json_utf(data):
|
| return None
|
|
|
|
|
| -def prepend_scheme_if_needed(url, new_scheme):
|
| - '''Given a URL that may or may not have a scheme, prepend the given scheme.
|
| - Does not replace a present scheme with the one provided as an argument.'''
|
| - scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
|
| -
|
| - # urlparse is a finicky beast, and sometimes decides that there isn't a
|
| - # netloc present. Assume that it's being over-cautious, and switch netloc
|
| - # and path if urlparse decided there was no netloc.
|
| - if not netloc:
|
| - netloc, path = path, netloc
|
| +def except_on_missing_scheme(url):
|
| + """Given a URL, raise a MissingSchema exception if the scheme is missing.
|
| + """
|
| + scheme, netloc, path, params, query, fragment = urlparse(url)
|
|
|
| - return urlunparse((scheme, netloc, path, params, query, fragment))
|
| + if not scheme:
|
| + raise MissingSchema('Proxy URLs must have explicit schemes.')
|
|
|
|
|
| def get_auth_from_url(url):
|
| @@ -548,3 +550,22 @@ def get_auth_from_url(url):
|
| return (parsed.username, parsed.password)
|
| else:
|
| return ('', '')
|
| +
|
| +
|
| +def to_native_string(string, encoding='ascii'):
|
| + """
|
| + Given a string object, regardless of type, returns a representation of that
|
| + string in the native string type, encoding and decoding where necessary.
|
| + This assumes ASCII unless told otherwise.
|
| + """
|
| + out = None
|
| +
|
| + if isinstance(string, builtin_str):
|
| + out = string
|
| + else:
|
| + if is_py2:
|
| + out = string.encode(encoding)
|
| + else:
|
| + out = string.decode(encoding)
|
| +
|
| + return out
|
|
|