OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 | 2 |
3 """ | 3 """ |
4 requests.utils | 4 requests.utils |
5 ~~~~~~~~~~~~~~ | 5 ~~~~~~~~~~~~~~ |
6 | 6 |
7 This module provides utility functions that are used within Requests | 7 This module provides utility functions that are used within Requests |
8 that are also useful for external consumption. | 8 that are also useful for external consumption. |
9 | 9 |
10 """ | 10 """ |
11 | 11 |
12 import cgi | 12 import cgi |
13 import codecs | 13 import codecs |
14 import collections | 14 import collections |
15 import os | 15 import os |
16 import platform | 16 import platform |
17 import re | 17 import re |
18 import sys | 18 import sys |
19 from netrc import netrc, NetrcParseError | 19 from netrc import netrc, NetrcParseError |
20 | 20 |
21 from . import __version__ | 21 from . import __version__ |
22 from . import certs | 22 from . import certs |
23 from .compat import parse_http_list as _parse_list_header | 23 from .compat import parse_http_list as _parse_list_header |
24 from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse | 24 from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, |
| 25 is_py2, is_py3, builtin_str, getproxies, proxy_bypass) |
25 from .cookies import RequestsCookieJar, cookiejar_from_dict | 26 from .cookies import RequestsCookieJar, cookiejar_from_dict |
26 from .structures import CaseInsensitiveDict | 27 from .structures import CaseInsensitiveDict |
| 28 from .exceptions import MissingSchema, InvalidURL |
27 | 29 |
28 _hush_pyflakes = (RequestsCookieJar,) | 30 _hush_pyflakes = (RequestsCookieJar,) |
29 | 31 |
30 NETRC_FILES = ('.netrc', '_netrc') | 32 NETRC_FILES = ('.netrc', '_netrc') |
31 | 33 |
32 DEFAULT_CA_BUNDLE_PATH = certs.where() | 34 DEFAULT_CA_BUNDLE_PATH = certs.where() |
33 | 35 |
34 | 36 |
35 def dict_to_sequence(d): | 37 def dict_to_sequence(d): |
36 """Returns an internal sequence dictionary update.""" | 38 """Returns an internal sequence dictionary update.""" |
(...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
257 return cj | 259 return cj |
258 | 260 |
259 | 261 |
260 def get_encodings_from_content(content): | 262 def get_encodings_from_content(content): |
261 """Returns encodings from given content string. | 263 """Returns encodings from given content string. |
262 | 264 |
263 :param content: bytestring to extract encodings from. | 265 :param content: bytestring to extract encodings from. |
264 """ | 266 """ |
265 | 267 |
266 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) | 268 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) |
| 269 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags
=re.I) |
| 270 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') |
267 | 271 |
268 return charset_re.findall(content) | 272 return (charset_re.findall(content) + |
| 273 pragma_re.findall(content) + |
| 274 xml_re.findall(content)) |
269 | 275 |
270 | 276 |
271 def get_encoding_from_headers(headers): | 277 def get_encoding_from_headers(headers): |
272 """Returns encodings from given HTTP Header Dict. | 278 """Returns encodings from given HTTP Header Dict. |
273 | 279 |
274 :param headers: dictionary to extract encoding from. | 280 :param headers: dictionary to extract encoding from. |
275 """ | 281 """ |
276 | 282 |
277 content_type = headers.get('content-type') | 283 content_type = headers.get('content-type') |
278 | 284 |
(...skipping 15 matching lines...) Expand all Loading... |
294 if r.encoding is None: | 300 if r.encoding is None: |
295 for item in iterator: | 301 for item in iterator: |
296 yield item | 302 yield item |
297 return | 303 return |
298 | 304 |
299 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') | 305 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') |
300 for chunk in iterator: | 306 for chunk in iterator: |
301 rv = decoder.decode(chunk) | 307 rv = decoder.decode(chunk) |
302 if rv: | 308 if rv: |
303 yield rv | 309 yield rv |
304 rv = decoder.decode('', final=True) | 310 rv = decoder.decode(b'', final=True) |
305 if rv: | 311 if rv: |
306 yield rv | 312 yield rv |
307 | 313 |
308 | 314 |
309 def iter_slices(string, slice_length): | 315 def iter_slices(string, slice_length): |
310 """Iterate over slices of a string.""" | 316 """Iterate over slices of a string.""" |
311 pos = 0 | 317 pos = 0 |
312 while pos < len(string): | 318 while pos < len(string): |
313 yield string[pos:pos + slice_length] | 319 yield string[pos:pos + slice_length] |
314 pos += slice_length | 320 pos += slice_length |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
354 | 360 |
355 | 361 |
356 def unquote_unreserved(uri): | 362 def unquote_unreserved(uri): |
357 """Un-escape any percent-escape sequences in a URI that are unreserved | 363 """Un-escape any percent-escape sequences in a URI that are unreserved |
358 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. | 364 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. |
359 """ | 365 """ |
360 parts = uri.split('%') | 366 parts = uri.split('%') |
361 for i in range(1, len(parts)): | 367 for i in range(1, len(parts)): |
362 h = parts[i][0:2] | 368 h = parts[i][0:2] |
363 if len(h) == 2 and h.isalnum(): | 369 if len(h) == 2 and h.isalnum(): |
364 c = chr(int(h, 16)) | 370 try: |
| 371 c = chr(int(h, 16)) |
| 372 except ValueError: |
| 373 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) |
| 374 |
365 if c in UNRESERVED_SET: | 375 if c in UNRESERVED_SET: |
366 parts[i] = c + parts[i][2:] | 376 parts[i] = c + parts[i][2:] |
367 else: | 377 else: |
368 parts[i] = '%' + parts[i] | 378 parts[i] = '%' + parts[i] |
369 else: | 379 else: |
370 parts[i] = '%' + parts[i] | 380 parts[i] = '%' + parts[i] |
371 return ''.join(parts) | 381 return ''.join(parts) |
372 | 382 |
373 | 383 |
374 def requote_uri(uri): | 384 def requote_uri(uri): |
375 """Re-quote the given URI. | 385 """Re-quote the given URI. |
376 | 386 |
377 This function passes the given URI through an unquote/quote cycle to | 387 This function passes the given URI through an unquote/quote cycle to |
378 ensure that it is fully and consistently quoted. | 388 ensure that it is fully and consistently quoted. |
379 """ | 389 """ |
380 # Unquote only the unreserved characters | 390 # Unquote only the unreserved characters |
381 # Then quote only illegal characters (do not quote reserved, unreserved, | 391 # Then quote only illegal characters (do not quote reserved, unreserved, |
382 # or '%') | 392 # or '%') |
383 return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") | 393 return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") |
384 | 394 |
385 | 395 |
386 def get_environ_proxies(url): | 396 def get_environ_proxies(url): |
387 """Return a dict of environment proxies.""" | 397 """Return a dict of environment proxies.""" |
388 | 398 |
389 proxy_keys = [ | |
390 'all', | |
391 'http', | |
392 'https', | |
393 'ftp', | |
394 'socks' | |
395 ] | |
396 | |
397 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) | 399 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) |
398 | 400 |
399 # First check whether no_proxy is defined. If it is, check that the URL | 401 # First check whether no_proxy is defined. If it is, check that the URL |
400 # we're getting isn't in the no_proxy list. | 402 # we're getting isn't in the no_proxy list. |
401 no_proxy = get_proxy('no_proxy') | 403 no_proxy = get_proxy('no_proxy') |
| 404 netloc = urlparse(url).netloc |
402 | 405 |
403 if no_proxy: | 406 if no_proxy: |
404 # We need to check whether we match here. We need to see if we match | 407 # We need to check whether we match here. We need to see if we match |
405 # the end of the netloc, both with and without the port. | 408 # the end of the netloc, both with and without the port. |
406 no_proxy = no_proxy.split(',') | 409 no_proxy = no_proxy.replace(' ', '').split(',') |
407 netloc = urlparse(url).netloc | |
408 | 410 |
409 for host in no_proxy: | 411 for host in no_proxy: |
410 if netloc.endswith(host) or netloc.split(':')[0].endswith(host): | 412 if netloc.endswith(host) or netloc.split(':')[0].endswith(host): |
411 # The URL does match something in no_proxy, so we don't want | 413 # The URL does match something in no_proxy, so we don't want |
412 # to apply the proxies on this URL. | 414 # to apply the proxies on this URL. |
413 return {} | 415 return {} |
414 | 416 |
| 417 # If the system proxy settings indicate that this URL should be bypassed, |
| 418 # don't proxy. |
| 419 if proxy_bypass(netloc): |
| 420 return {} |
| 421 |
415 # If we get here, we either didn't have no_proxy set or we're not going | 422 # If we get here, we either didn't have no_proxy set or we're not going |
416 # anywhere that no_proxy applies to. | 423 # anywhere that no_proxy applies to, and the system settings don't require |
417 proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys] | 424 # bypassing the proxy for the current URL. |
418 return dict([(key, val) for (key, val) in proxies if val]) | 425 return getproxies() |
419 | 426 |
420 | 427 |
421 def default_user_agent(): | 428 def default_user_agent(): |
422 """Return a string representing the default user agent.""" | 429 """Return a string representing the default user agent.""" |
423 _implementation = platform.python_implementation() | 430 _implementation = platform.python_implementation() |
424 | 431 |
425 if _implementation == 'CPython': | 432 if _implementation == 'CPython': |
426 _implementation_version = platform.python_version() | 433 _implementation_version = platform.python_version() |
427 elif _implementation == 'PyPy': | 434 elif _implementation == 'PyPy': |
428 _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, | 435 _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
519 # Did not detect 2 valid UTF-16 ascii-range characters | 526 # Did not detect 2 valid UTF-16 ascii-range characters |
520 if nullcount == 3: | 527 if nullcount == 3: |
521 if sample[:3] == _null3: | 528 if sample[:3] == _null3: |
522 return 'utf-32-be' | 529 return 'utf-32-be' |
523 if sample[1:] == _null3: | 530 if sample[1:] == _null3: |
524 return 'utf-32-le' | 531 return 'utf-32-le' |
525 # Did not detect a valid UTF-32 ascii-range character | 532 # Did not detect a valid UTF-32 ascii-range character |
526 return None | 533 return None |
527 | 534 |
528 | 535 |
529 def prepend_scheme_if_needed(url, new_scheme): | 536 def except_on_missing_scheme(url): |
530 '''Given a URL that may or may not have a scheme, prepend the given scheme. | 537 """Given a URL, raise a MissingSchema exception if the scheme is missing. |
531 Does not replace a present scheme with the one provided as an argument.''' | 538 """ |
532 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) | 539 scheme, netloc, path, params, query, fragment = urlparse(url) |
533 | 540 |
534 # urlparse is a finicky beast, and sometimes decides that there isn't a | 541 if not scheme: |
535 # netloc present. Assume that it's being over-cautious, and switch netloc | 542 raise MissingSchema('Proxy URLs must have explicit schemes.') |
536 # and path if urlparse decided there was no netloc. | |
537 if not netloc: | |
538 netloc, path = path, netloc | |
539 | |
540 return urlunparse((scheme, netloc, path, params, query, fragment)) | |
541 | 543 |
542 | 544 |
543 def get_auth_from_url(url): | 545 def get_auth_from_url(url): |
544 """Given a url with authentication components, extract them into a tuple of | 546 """Given a url with authentication components, extract them into a tuple of |
545 username,password.""" | 547 username,password.""" |
546 if url: | 548 if url: |
547 parsed = urlparse(url) | 549 parsed = urlparse(url) |
548 return (parsed.username, parsed.password) | 550 return (parsed.username, parsed.password) |
549 else: | 551 else: |
550 return ('', '') | 552 return ('', '') |
| 553 |
| 554 |
| 555 def to_native_string(string, encoding='ascii'): |
| 556 """ |
| 557 Given a string object, regardless of type, returns a representation of that |
| 558 string in the native string type, encoding and decoding where necessary. |
| 559 This assumes ASCII unless told otherwise. |
| 560 """ |
| 561 out = None |
| 562 |
| 563 if isinstance(string, builtin_str): |
| 564 out = string |
| 565 else: |
| 566 if is_py2: |
| 567 out = string.encode(encoding) |
| 568 else: |
| 569 out = string.decode(encoding) |
| 570 |
| 571 return out |
OLD | NEW |