| OLD | NEW |
| 1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
| 2 | 2 |
| 3 """ | 3 """ |
| 4 requests.utils | 4 requests.utils |
| 5 ~~~~~~~~~~~~~~ | 5 ~~~~~~~~~~~~~~ |
| 6 | 6 |
| 7 This module provides utility functions that are used within Requests | 7 This module provides utility functions that are used within Requests |
| 8 that are also useful for external consumption. | 8 that are also useful for external consumption. |
| 9 | 9 |
| 10 """ | 10 """ |
| 11 | 11 |
| 12 import cgi | 12 import cgi |
| 13 import codecs | 13 import codecs |
| 14 import collections | 14 import collections |
| 15 import os | 15 import os |
| 16 import platform | 16 import platform |
| 17 import re | 17 import re |
| 18 import sys | 18 import sys |
| 19 from netrc import netrc, NetrcParseError | 19 from netrc import netrc, NetrcParseError |
| 20 | 20 |
| 21 from . import __version__ | 21 from . import __version__ |
| 22 from . import certs | 22 from . import certs |
| 23 from .compat import parse_http_list as _parse_list_header | 23 from .compat import parse_http_list as _parse_list_header |
| 24 from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse | 24 from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, |
| 25 is_py2, is_py3, builtin_str, getproxies, proxy_bypass) |
| 25 from .cookies import RequestsCookieJar, cookiejar_from_dict | 26 from .cookies import RequestsCookieJar, cookiejar_from_dict |
| 26 from .structures import CaseInsensitiveDict | 27 from .structures import CaseInsensitiveDict |
| 28 from .exceptions import MissingSchema, InvalidURL |
| 27 | 29 |
| 28 _hush_pyflakes = (RequestsCookieJar,) | 30 _hush_pyflakes = (RequestsCookieJar,) |
| 29 | 31 |
| 30 NETRC_FILES = ('.netrc', '_netrc') | 32 NETRC_FILES = ('.netrc', '_netrc') |
| 31 | 33 |
| 32 DEFAULT_CA_BUNDLE_PATH = certs.where() | 34 DEFAULT_CA_BUNDLE_PATH = certs.where() |
| 33 | 35 |
| 34 | 36 |
| 35 def dict_to_sequence(d): | 37 def dict_to_sequence(d): |
| 36 """Returns an internal sequence dictionary update.""" | 38 """Returns an internal sequence dictionary update.""" |
| (...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 257 return cj | 259 return cj |
| 258 | 260 |
| 259 | 261 |
| 260 def get_encodings_from_content(content): | 262 def get_encodings_from_content(content): |
| 261 """Returns encodings from given content string. | 263 """Returns encodings from given content string. |
| 262 | 264 |
| 263 :param content: bytestring to extract encodings from. | 265 :param content: bytestring to extract encodings from. |
| 264 """ | 266 """ |
| 265 | 267 |
| 266 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) | 268 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) |
| 269 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags
=re.I) |
| 270 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') |
| 267 | 271 |
| 268 return charset_re.findall(content) | 272 return (charset_re.findall(content) + |
| 273 pragma_re.findall(content) + |
| 274 xml_re.findall(content)) |
| 269 | 275 |
| 270 | 276 |
| 271 def get_encoding_from_headers(headers): | 277 def get_encoding_from_headers(headers): |
| 272 """Returns encodings from given HTTP Header Dict. | 278 """Returns encodings from given HTTP Header Dict. |
| 273 | 279 |
| 274 :param headers: dictionary to extract encoding from. | 280 :param headers: dictionary to extract encoding from. |
| 275 """ | 281 """ |
| 276 | 282 |
| 277 content_type = headers.get('content-type') | 283 content_type = headers.get('content-type') |
| 278 | 284 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 294 if r.encoding is None: | 300 if r.encoding is None: |
| 295 for item in iterator: | 301 for item in iterator: |
| 296 yield item | 302 yield item |
| 297 return | 303 return |
| 298 | 304 |
| 299 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') | 305 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') |
| 300 for chunk in iterator: | 306 for chunk in iterator: |
| 301 rv = decoder.decode(chunk) | 307 rv = decoder.decode(chunk) |
| 302 if rv: | 308 if rv: |
| 303 yield rv | 309 yield rv |
| 304 rv = decoder.decode('', final=True) | 310 rv = decoder.decode(b'', final=True) |
| 305 if rv: | 311 if rv: |
| 306 yield rv | 312 yield rv |
| 307 | 313 |
| 308 | 314 |
| 309 def iter_slices(string, slice_length): | 315 def iter_slices(string, slice_length): |
| 310 """Iterate over slices of a string.""" | 316 """Iterate over slices of a string.""" |
| 311 pos = 0 | 317 pos = 0 |
| 312 while pos < len(string): | 318 while pos < len(string): |
| 313 yield string[pos:pos + slice_length] | 319 yield string[pos:pos + slice_length] |
| 314 pos += slice_length | 320 pos += slice_length |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 354 | 360 |
| 355 | 361 |
| 356 def unquote_unreserved(uri): | 362 def unquote_unreserved(uri): |
| 357 """Un-escape any percent-escape sequences in a URI that are unreserved | 363 """Un-escape any percent-escape sequences in a URI that are unreserved |
| 358 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. | 364 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. |
| 359 """ | 365 """ |
| 360 parts = uri.split('%') | 366 parts = uri.split('%') |
| 361 for i in range(1, len(parts)): | 367 for i in range(1, len(parts)): |
| 362 h = parts[i][0:2] | 368 h = parts[i][0:2] |
| 363 if len(h) == 2 and h.isalnum(): | 369 if len(h) == 2 and h.isalnum(): |
| 364 c = chr(int(h, 16)) | 370 try: |
| 371 c = chr(int(h, 16)) |
| 372 except ValueError: |
| 373 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) |
| 374 |
| 365 if c in UNRESERVED_SET: | 375 if c in UNRESERVED_SET: |
| 366 parts[i] = c + parts[i][2:] | 376 parts[i] = c + parts[i][2:] |
| 367 else: | 377 else: |
| 368 parts[i] = '%' + parts[i] | 378 parts[i] = '%' + parts[i] |
| 369 else: | 379 else: |
| 370 parts[i] = '%' + parts[i] | 380 parts[i] = '%' + parts[i] |
| 371 return ''.join(parts) | 381 return ''.join(parts) |
| 372 | 382 |
| 373 | 383 |
| 374 def requote_uri(uri): | 384 def requote_uri(uri): |
| 375 """Re-quote the given URI. | 385 """Re-quote the given URI. |
| 376 | 386 |
| 377 This function passes the given URI through an unquote/quote cycle to | 387 This function passes the given URI through an unquote/quote cycle to |
| 378 ensure that it is fully and consistently quoted. | 388 ensure that it is fully and consistently quoted. |
| 379 """ | 389 """ |
| 380 # Unquote only the unreserved characters | 390 # Unquote only the unreserved characters |
| 381 # Then quote only illegal characters (do not quote reserved, unreserved, | 391 # Then quote only illegal characters (do not quote reserved, unreserved, |
| 382 # or '%') | 392 # or '%') |
| 383 return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") | 393 return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") |
| 384 | 394 |
| 385 | 395 |
| 386 def get_environ_proxies(url): | 396 def get_environ_proxies(url): |
| 387 """Return a dict of environment proxies.""" | 397 """Return a dict of environment proxies.""" |
| 388 | 398 |
| 389 proxy_keys = [ | |
| 390 'all', | |
| 391 'http', | |
| 392 'https', | |
| 393 'ftp', | |
| 394 'socks' | |
| 395 ] | |
| 396 | |
| 397 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) | 399 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) |
| 398 | 400 |
| 399 # First check whether no_proxy is defined. If it is, check that the URL | 401 # First check whether no_proxy is defined. If it is, check that the URL |
| 400 # we're getting isn't in the no_proxy list. | 402 # we're getting isn't in the no_proxy list. |
| 401 no_proxy = get_proxy('no_proxy') | 403 no_proxy = get_proxy('no_proxy') |
| 404 netloc = urlparse(url).netloc |
| 402 | 405 |
| 403 if no_proxy: | 406 if no_proxy: |
| 404 # We need to check whether we match here. We need to see if we match | 407 # We need to check whether we match here. We need to see if we match |
| 405 # the end of the netloc, both with and without the port. | 408 # the end of the netloc, both with and without the port. |
| 406 no_proxy = no_proxy.split(',') | 409 no_proxy = no_proxy.replace(' ', '').split(',') |
| 407 netloc = urlparse(url).netloc | |
| 408 | 410 |
| 409 for host in no_proxy: | 411 for host in no_proxy: |
| 410 if netloc.endswith(host) or netloc.split(':')[0].endswith(host): | 412 if netloc.endswith(host) or netloc.split(':')[0].endswith(host): |
| 411 # The URL does match something in no_proxy, so we don't want | 413 # The URL does match something in no_proxy, so we don't want |
| 412 # to apply the proxies on this URL. | 414 # to apply the proxies on this URL. |
| 413 return {} | 415 return {} |
| 414 | 416 |
| 417 # If the system proxy settings indicate that this URL should be bypassed, |
| 418 # don't proxy. |
| 419 if proxy_bypass(netloc): |
| 420 return {} |
| 421 |
| 415 # If we get here, we either didn't have no_proxy set or we're not going | 422 # If we get here, we either didn't have no_proxy set or we're not going |
| 416 # anywhere that no_proxy applies to. | 423 # anywhere that no_proxy applies to, and the system settings don't require |
| 417 proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys] | 424 # bypassing the proxy for the current URL. |
| 418 return dict([(key, val) for (key, val) in proxies if val]) | 425 return getproxies() |
| 419 | 426 |
| 420 | 427 |
| 421 def default_user_agent(): | 428 def default_user_agent(): |
| 422 """Return a string representing the default user agent.""" | 429 """Return a string representing the default user agent.""" |
| 423 _implementation = platform.python_implementation() | 430 _implementation = platform.python_implementation() |
| 424 | 431 |
| 425 if _implementation == 'CPython': | 432 if _implementation == 'CPython': |
| 426 _implementation_version = platform.python_version() | 433 _implementation_version = platform.python_version() |
| 427 elif _implementation == 'PyPy': | 434 elif _implementation == 'PyPy': |
| 428 _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, | 435 _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 519 # Did not detect 2 valid UTF-16 ascii-range characters | 526 # Did not detect 2 valid UTF-16 ascii-range characters |
| 520 if nullcount == 3: | 527 if nullcount == 3: |
| 521 if sample[:3] == _null3: | 528 if sample[:3] == _null3: |
| 522 return 'utf-32-be' | 529 return 'utf-32-be' |
| 523 if sample[1:] == _null3: | 530 if sample[1:] == _null3: |
| 524 return 'utf-32-le' | 531 return 'utf-32-le' |
| 525 # Did not detect a valid UTF-32 ascii-range character | 532 # Did not detect a valid UTF-32 ascii-range character |
| 526 return None | 533 return None |
| 527 | 534 |
| 528 | 535 |
| 529 def prepend_scheme_if_needed(url, new_scheme): | 536 def except_on_missing_scheme(url): |
| 530 '''Given a URL that may or may not have a scheme, prepend the given scheme. | 537 """Given a URL, raise a MissingSchema exception if the scheme is missing. |
| 531 Does not replace a present scheme with the one provided as an argument.''' | 538 """ |
| 532 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) | 539 scheme, netloc, path, params, query, fragment = urlparse(url) |
| 533 | 540 |
| 534 # urlparse is a finicky beast, and sometimes decides that there isn't a | 541 if not scheme: |
| 535 # netloc present. Assume that it's being over-cautious, and switch netloc | 542 raise MissingSchema('Proxy URLs must have explicit schemes.') |
| 536 # and path if urlparse decided there was no netloc. | |
| 537 if not netloc: | |
| 538 netloc, path = path, netloc | |
| 539 | |
| 540 return urlunparse((scheme, netloc, path, params, query, fragment)) | |
| 541 | 543 |
| 542 | 544 |
| 543 def get_auth_from_url(url): | 545 def get_auth_from_url(url): |
| 544 """Given a url with authentication components, extract them into a tuple of | 546 """Given a url with authentication components, extract them into a tuple of |
| 545 username,password.""" | 547 username,password.""" |
| 546 if url: | 548 if url: |
| 547 parsed = urlparse(url) | 549 parsed = urlparse(url) |
| 548 return (parsed.username, parsed.password) | 550 return (parsed.username, parsed.password) |
| 549 else: | 551 else: |
| 550 return ('', '') | 552 return ('', '') |
| 553 |
| 554 |
| 555 def to_native_string(string, encoding='ascii'): |
| 556 """ |
| 557 Given a string object, regardless of type, returns a representation of that |
| 558 string in the native string type, encoding and decoding where necessary. |
| 559 This assumes ASCII unless told otherwise. |
| 560 """ |
| 561 out = None |
| 562 |
| 563 if isinstance(string, builtin_str): |
| 564 out = string |
| 565 else: |
| 566 if is_py2: |
| 567 out = string.encode(encoding) |
| 568 else: |
| 569 out = string.decode(encoding) |
| 570 |
| 571 return out |
| OLD | NEW |