| OLD | NEW | 
|    1 # -*- coding: utf-8 -*- |    1 # -*- coding: utf-8 -*- | 
|    2  |    2  | 
|    3 """ |    3 """ | 
|    4 requests.utils |    4 requests.utils | 
|    5 ~~~~~~~~~~~~~~ |    5 ~~~~~~~~~~~~~~ | 
|    6  |    6  | 
|    7 This module provides utility functions that are used within Requests |    7 This module provides utility functions that are used within Requests | 
|    8 that are also useful for external consumption. |    8 that are also useful for external consumption. | 
|    9  |    9  | 
|   10 """ |   10 """ | 
|   11  |   11  | 
|   12 import cgi |   12 import cgi | 
|   13 import codecs |   13 import codecs | 
|   14 import collections |   14 import collections | 
|   15 import os |   15 import os | 
|   16 import platform |   16 import platform | 
|   17 import re |   17 import re | 
|   18 import sys |   18 import sys | 
|   19 from netrc import netrc, NetrcParseError |   19 from netrc import netrc, NetrcParseError | 
|   20  |   20  | 
|   21 from . import __version__ |   21 from . import __version__ | 
|   22 from . import certs |   22 from . import certs | 
|   23 from .compat import parse_http_list as _parse_list_header |   23 from .compat import parse_http_list as _parse_list_header | 
|   24 from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse |   24 from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, | 
 |   25                      is_py2, is_py3, builtin_str, getproxies, proxy_bypass) | 
|   25 from .cookies import RequestsCookieJar, cookiejar_from_dict |   26 from .cookies import RequestsCookieJar, cookiejar_from_dict | 
|   26 from .structures import CaseInsensitiveDict |   27 from .structures import CaseInsensitiveDict | 
 |   28 from .exceptions import MissingSchema, InvalidURL | 
|   27  |   29  | 
|   28 _hush_pyflakes = (RequestsCookieJar,) |   30 _hush_pyflakes = (RequestsCookieJar,) | 
|   29  |   31  | 
|   30 NETRC_FILES = ('.netrc', '_netrc') |   32 NETRC_FILES = ('.netrc', '_netrc') | 
|   31  |   33  | 
|   32 DEFAULT_CA_BUNDLE_PATH = certs.where() |   34 DEFAULT_CA_BUNDLE_PATH = certs.where() | 
|   33  |   35  | 
|   34  |   36  | 
|   35 def dict_to_sequence(d): |   37 def dict_to_sequence(d): | 
|   36     """Returns an internal sequence dictionary update.""" |   38     """Returns an internal sequence dictionary update.""" | 
| (...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  257     return cj |  259     return cj | 
|  258  |  260  | 
|  259  |  261  | 
|  260 def get_encodings_from_content(content): |  262 def get_encodings_from_content(content): | 
|  261     """Returns encodings from given content string. |  263     """Returns encodings from given content string. | 
|  262  |  264  | 
|  263     :param content: bytestring to extract encodings from. |  265     :param content: bytestring to extract encodings from. | 
|  264     """ |  266     """ | 
|  265  |  267  | 
|  266     charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) |  268     charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) | 
 |  269     pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags
     =re.I) | 
 |  270     xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') | 
|  267  |  271  | 
|  268     return charset_re.findall(content) |  272     return (charset_re.findall(content) + | 
 |  273             pragma_re.findall(content) + | 
 |  274             xml_re.findall(content)) | 
|  269  |  275  | 
|  270  |  276  | 
|  271 def get_encoding_from_headers(headers): |  277 def get_encoding_from_headers(headers): | 
|  272     """Returns encodings from given HTTP Header Dict. |  278     """Returns encodings from given HTTP Header Dict. | 
|  273  |  279  | 
|  274     :param headers: dictionary to extract encoding from. |  280     :param headers: dictionary to extract encoding from. | 
|  275     """ |  281     """ | 
|  276  |  282  | 
|  277     content_type = headers.get('content-type') |  283     content_type = headers.get('content-type') | 
|  278  |  284  | 
| (...skipping 15 matching lines...) Expand all  Loading... | 
|  294     if r.encoding is None: |  300     if r.encoding is None: | 
|  295         for item in iterator: |  301         for item in iterator: | 
|  296             yield item |  302             yield item | 
|  297         return |  303         return | 
|  298  |  304  | 
|  299     decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') |  305     decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') | 
|  300     for chunk in iterator: |  306     for chunk in iterator: | 
|  301         rv = decoder.decode(chunk) |  307         rv = decoder.decode(chunk) | 
|  302         if rv: |  308         if rv: | 
|  303             yield rv |  309             yield rv | 
|  304     rv = decoder.decode('', final=True) |  310     rv = decoder.decode(b'', final=True) | 
|  305     if rv: |  311     if rv: | 
|  306         yield rv |  312         yield rv | 
|  307  |  313  | 
|  308  |  314  | 
|  309 def iter_slices(string, slice_length): |  315 def iter_slices(string, slice_length): | 
|  310     """Iterate over slices of a string.""" |  316     """Iterate over slices of a string.""" | 
|  311     pos = 0 |  317     pos = 0 | 
|  312     while pos < len(string): |  318     while pos < len(string): | 
|  313         yield string[pos:pos + slice_length] |  319         yield string[pos:pos + slice_length] | 
|  314         pos += slice_length |  320         pos += slice_length | 
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  354  |  360  | 
|  355  |  361  | 
|  356 def unquote_unreserved(uri): |  362 def unquote_unreserved(uri): | 
|  357     """Un-escape any percent-escape sequences in a URI that are unreserved |  363     """Un-escape any percent-escape sequences in a URI that are unreserved | 
|  358     characters. This leaves all reserved, illegal and non-ASCII bytes encoded. |  364     characters. This leaves all reserved, illegal and non-ASCII bytes encoded. | 
|  359     """ |  365     """ | 
|  360     parts = uri.split('%') |  366     parts = uri.split('%') | 
|  361     for i in range(1, len(parts)): |  367     for i in range(1, len(parts)): | 
|  362         h = parts[i][0:2] |  368         h = parts[i][0:2] | 
|  363         if len(h) == 2 and h.isalnum(): |  369         if len(h) == 2 and h.isalnum(): | 
|  364             c = chr(int(h, 16)) |  370             try: | 
 |  371                 c = chr(int(h, 16)) | 
 |  372             except ValueError: | 
 |  373                 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) | 
 |  374  | 
|  365             if c in UNRESERVED_SET: |  375             if c in UNRESERVED_SET: | 
|  366                 parts[i] = c + parts[i][2:] |  376                 parts[i] = c + parts[i][2:] | 
|  367             else: |  377             else: | 
|  368                 parts[i] = '%' + parts[i] |  378                 parts[i] = '%' + parts[i] | 
|  369         else: |  379         else: | 
|  370             parts[i] = '%' + parts[i] |  380             parts[i] = '%' + parts[i] | 
|  371     return ''.join(parts) |  381     return ''.join(parts) | 
|  372  |  382  | 
|  373  |  383  | 
|  374 def requote_uri(uri): |  384 def requote_uri(uri): | 
|  375     """Re-quote the given URI. |  385     """Re-quote the given URI. | 
|  376  |  386  | 
|  377     This function passes the given URI through an unquote/quote cycle to |  387     This function passes the given URI through an unquote/quote cycle to | 
|  378     ensure that it is fully and consistently quoted. |  388     ensure that it is fully and consistently quoted. | 
|  379     """ |  389     """ | 
|  380     # Unquote only the unreserved characters |  390     # Unquote only the unreserved characters | 
|  381     # Then quote only illegal characters (do not quote reserved, unreserved, |  391     # Then quote only illegal characters (do not quote reserved, unreserved, | 
|  382     # or '%') |  392     # or '%') | 
|  383     return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") |  393     return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") | 
|  384  |  394  | 
|  385  |  395  | 
|  386 def get_environ_proxies(url): |  396 def get_environ_proxies(url): | 
|  387     """Return a dict of environment proxies.""" |  397     """Return a dict of environment proxies.""" | 
|  388  |  398  | 
|  389     proxy_keys = [ |  | 
|  390         'all', |  | 
|  391         'http', |  | 
|  392         'https', |  | 
|  393         'ftp', |  | 
|  394         'socks' |  | 
|  395     ] |  | 
|  396  |  | 
|  397     get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) |  399     get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) | 
|  398  |  400  | 
|  399     # First check whether no_proxy is defined. If it is, check that the URL |  401     # First check whether no_proxy is defined. If it is, check that the URL | 
|  400     # we're getting isn't in the no_proxy list. |  402     # we're getting isn't in the no_proxy list. | 
|  401     no_proxy = get_proxy('no_proxy') |  403     no_proxy = get_proxy('no_proxy') | 
 |  404     netloc = urlparse(url).netloc | 
|  402  |  405  | 
|  403     if no_proxy: |  406     if no_proxy: | 
|  404         # We need to check whether we match here. We need to see if we match |  407         # We need to check whether we match here. We need to see if we match | 
|  405         # the end of the netloc, both with and without the port. |  408         # the end of the netloc, both with and without the port. | 
|  406         no_proxy = no_proxy.split(',') |  409         no_proxy = no_proxy.replace(' ', '').split(',') | 
|  407         netloc = urlparse(url).netloc |  | 
|  408  |  410  | 
|  409         for host in no_proxy: |  411         for host in no_proxy: | 
|  410             if netloc.endswith(host) or netloc.split(':')[0].endswith(host): |  412             if netloc.endswith(host) or netloc.split(':')[0].endswith(host): | 
|  411                 # The URL does match something in no_proxy, so we don't want |  413                 # The URL does match something in no_proxy, so we don't want | 
|  412                 # to apply the proxies on this URL. |  414                 # to apply the proxies on this URL. | 
|  413                 return {} |  415                 return {} | 
|  414  |  416  | 
 |  417     # If the system proxy settings indicate that this URL should be bypassed, | 
 |  418     # don't proxy. | 
 |  419     if proxy_bypass(netloc): | 
 |  420         return {} | 
 |  421  | 
|  415     # If we get here, we either didn't have no_proxy set or we're not going |  422     # If we get here, we either didn't have no_proxy set or we're not going | 
|  416     # anywhere that no_proxy applies to. |  423     # anywhere that no_proxy applies to, and the system settings don't require | 
|  417     proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys] |  424     # bypassing the proxy for the current URL. | 
|  418     return dict([(key, val) for (key, val) in proxies if val]) |  425     return getproxies() | 
|  419  |  426  | 
|  420  |  427  | 
|  421 def default_user_agent(): |  428 def default_user_agent(): | 
|  422     """Return a string representing the default user agent.""" |  429     """Return a string representing the default user agent.""" | 
|  423     _implementation = platform.python_implementation() |  430     _implementation = platform.python_implementation() | 
|  424  |  431  | 
|  425     if _implementation == 'CPython': |  432     if _implementation == 'CPython': | 
|  426         _implementation_version = platform.python_version() |  433         _implementation_version = platform.python_version() | 
|  427     elif _implementation == 'PyPy': |  434     elif _implementation == 'PyPy': | 
|  428         _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, |  435         _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, | 
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  519         # Did not detect 2 valid UTF-16 ascii-range characters |  526         # Did not detect 2 valid UTF-16 ascii-range characters | 
|  520     if nullcount == 3: |  527     if nullcount == 3: | 
|  521         if sample[:3] == _null3: |  528         if sample[:3] == _null3: | 
|  522             return 'utf-32-be' |  529             return 'utf-32-be' | 
|  523         if sample[1:] == _null3: |  530         if sample[1:] == _null3: | 
|  524             return 'utf-32-le' |  531             return 'utf-32-le' | 
|  525         # Did not detect a valid UTF-32 ascii-range character |  532         # Did not detect a valid UTF-32 ascii-range character | 
|  526     return None |  533     return None | 
|  527  |  534  | 
|  528  |  535  | 
|  529 def prepend_scheme_if_needed(url, new_scheme): |  536 def except_on_missing_scheme(url): | 
|  530     '''Given a URL that may or may not have a scheme, prepend the given scheme. |  537     """Given a URL, raise a MissingSchema exception if the scheme is missing. | 
|  531     Does not replace a present scheme with the one provided as an argument.''' |  538     """ | 
|  532     scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) |  539     scheme, netloc, path, params, query, fragment = urlparse(url) | 
|  533  |  540  | 
|  534     # urlparse is a finicky beast, and sometimes decides that there isn't a |  541     if not scheme: | 
|  535     # netloc present. Assume that it's being over-cautious, and switch netloc |  542         raise MissingSchema('Proxy URLs must have explicit schemes.') | 
|  536     # and path if urlparse decided there was no netloc. |  | 
|  537     if not netloc: |  | 
|  538         netloc, path = path, netloc |  | 
|  539  |  | 
|  540     return urlunparse((scheme, netloc, path, params, query, fragment)) |  | 
|  541  |  543  | 
|  542  |  544  | 
|  543 def get_auth_from_url(url): |  545 def get_auth_from_url(url): | 
|  544     """Given a url with authentication components, extract them into a tuple of |  546     """Given a url with authentication components, extract them into a tuple of | 
|  545     username,password.""" |  547     username,password.""" | 
|  546     if url: |  548     if url: | 
|  547         parsed = urlparse(url) |  549         parsed = urlparse(url) | 
|  548         return (parsed.username, parsed.password) |  550         return (parsed.username, parsed.password) | 
|  549     else: |  551     else: | 
|  550         return ('', '') |  552         return ('', '') | 
 |  553  | 
 |  554  | 
 |  555 def to_native_string(string, encoding='ascii'): | 
 |  556     """ | 
 |  557     Given a string object, regardless of type, returns a representation of that | 
 |  558     string in the native string type, encoding and decoding where necessary. | 
 |  559     This assumes ASCII unless told otherwise. | 
 |  560     """ | 
 |  561     out = None | 
 |  562  | 
 |  563     if isinstance(string, builtin_str): | 
 |  564         out = string | 
 |  565     else: | 
 |  566         if is_py2: | 
 |  567             out = string.encode(encoding) | 
 |  568         else: | 
 |  569             out = string.decode(encoding) | 
 |  570  | 
 |  571     return out | 
| OLD | NEW |