OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 |
| 3 """ |
| 4 requests.utils |
| 5 ~~~~~~~~~~~~~~ |
| 6 |
| 7 This module provides utility functions that are used within Requests |
| 8 that are also useful for external consumption. |
| 9 """ |
| 10 |
| 11 import cgi |
| 12 import codecs |
| 13 import collections |
| 14 import io |
| 15 import os |
| 16 import re |
| 17 import socket |
| 18 import struct |
| 19 import warnings |
| 20 |
| 21 from . import __version__ |
| 22 from . import certs |
| 23 # to_native_string is unused here, but imported here for backwards compatibility |
| 24 from ._internal_utils import to_native_string |
| 25 from .compat import parse_http_list as _parse_list_header |
| 26 from .compat import ( |
| 27 quote, urlparse, bytes, str, OrderedDict, unquote, getproxies, |
| 28 proxy_bypass, urlunparse, basestring, integer_types) |
| 29 from .cookies import RequestsCookieJar, cookiejar_from_dict |
| 30 from .structures import CaseInsensitiveDict |
| 31 from .exceptions import ( |
| 32 InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError) |
| 33 |
| 34 _hush_pyflakes = (RequestsCookieJar,) |
| 35 |
| 36 NETRC_FILES = ('.netrc', '_netrc') |
| 37 |
| 38 DEFAULT_CA_BUNDLE_PATH = certs.where() |
| 39 |
| 40 |
| 41 def dict_to_sequence(d): |
| 42 """Returns an internal sequence dictionary update.""" |
| 43 |
| 44 if hasattr(d, 'items'): |
| 45 d = d.items() |
| 46 |
| 47 return d |
| 48 |
| 49 |
| 50 def super_len(o): |
| 51 total_length = None |
| 52 current_position = 0 |
| 53 |
| 54 if hasattr(o, '__len__'): |
| 55 total_length = len(o) |
| 56 |
| 57 elif hasattr(o, 'len'): |
| 58 total_length = o.len |
| 59 |
| 60 elif hasattr(o, 'fileno'): |
| 61 try: |
| 62 fileno = o.fileno() |
| 63 except io.UnsupportedOperation: |
| 64 pass |
| 65 else: |
| 66 total_length = os.fstat(fileno).st_size |
| 67 |
| 68 # Having used fstat to determine the file length, we need to |
| 69 # confirm that this file was opened up in binary mode. |
| 70 if 'b' not in o.mode: |
| 71 warnings.warn(( |
| 72 "Requests has determined the content-length for this " |
| 73 "request using the binary size of the file: however, the " |
| 74 "file has been opened in text mode (i.e. without the 'b' " |
| 75 "flag in the mode). This may lead to an incorrect " |
| 76 "content-length. In Requests 3.0, support will be removed " |
| 77 "for files in text mode."), |
| 78 FileModeWarning |
| 79 ) |
| 80 |
| 81 if hasattr(o, 'tell'): |
| 82 try: |
| 83 current_position = o.tell() |
| 84 except (OSError, IOError): |
| 85 # This can happen in some weird situations, such as when the file |
| 86 # is actually a special file descriptor like stdin. In this |
| 87 # instance, we don't know what the length is, so set it to zero and |
| 88 # let requests chunk it instead. |
| 89 if total_length is not None: |
| 90 current_position = total_length |
| 91 else: |
| 92 if hasattr(o, 'seek') and total_length is None: |
| 93 # StringIO and BytesIO have seek but no useable fileno |
| 94 |
| 95 # seek to end of file |
| 96 o.seek(0, 2) |
| 97 total_length = o.tell() |
| 98 |
| 99 # seek back to current position to support |
| 100 # partially read file-like objects |
| 101 o.seek(current_position or 0) |
| 102 |
| 103 if total_length is None: |
| 104 total_length = 0 |
| 105 |
| 106 return max(0, total_length - current_position) |
| 107 |
| 108 |
| 109 def get_netrc_auth(url, raise_errors=False): |
| 110 """Returns the Requests tuple auth for a given url from netrc.""" |
| 111 |
| 112 try: |
| 113 from netrc import netrc, NetrcParseError |
| 114 |
| 115 netrc_path = None |
| 116 |
| 117 for f in NETRC_FILES: |
| 118 try: |
| 119 loc = os.path.expanduser('~/{0}'.format(f)) |
| 120 except KeyError: |
| 121 # os.path.expanduser can fail when $HOME is undefined and |
| 122 # getpwuid fails. See http://bugs.python.org/issue20164 & |
| 123 # https://github.com/kennethreitz/requests/issues/1846 |
| 124 return |
| 125 |
| 126 if os.path.exists(loc): |
| 127 netrc_path = loc |
| 128 break |
| 129 |
| 130 # Abort early if there isn't one. |
| 131 if netrc_path is None: |
| 132 return |
| 133 |
| 134 ri = urlparse(url) |
| 135 |
| 136 # Strip port numbers from netloc. This weird `if...encode`` dance is |
| 137 # used for Python 3.2, which doesn't support unicode literals. |
| 138 splitstr = b':' |
| 139 if isinstance(url, str): |
| 140 splitstr = splitstr.decode('ascii') |
| 141 host = ri.netloc.split(splitstr)[0] |
| 142 |
| 143 try: |
| 144 _netrc = netrc(netrc_path).authenticators(host) |
| 145 if _netrc: |
| 146 # Return with login / password |
| 147 login_i = (0 if _netrc[0] else 1) |
| 148 return (_netrc[login_i], _netrc[2]) |
| 149 except (NetrcParseError, IOError): |
| 150 # If there was a parsing error or a permissions issue reading the fi
le, |
| 151 # we'll just skip netrc auth unless explicitly asked to raise errors
. |
| 152 if raise_errors: |
| 153 raise |
| 154 |
| 155 # AppEngine hackiness. |
| 156 except (ImportError, AttributeError): |
| 157 pass |
| 158 |
| 159 |
| 160 def guess_filename(obj): |
| 161 """Tries to guess the filename of the given object.""" |
| 162 name = getattr(obj, 'name', None) |
| 163 if (name and isinstance(name, basestring) and name[0] != '<' and |
| 164 name[-1] != '>'): |
| 165 return os.path.basename(name) |
| 166 |
| 167 |
| 168 def from_key_val_list(value): |
| 169 """Take an object and test to see if it can be represented as a |
| 170 dictionary. Unless it can not be represented as such, return an |
| 171 OrderedDict, e.g., |
| 172 |
| 173 :: |
| 174 |
| 175 >>> from_key_val_list([('key', 'val')]) |
| 176 OrderedDict([('key', 'val')]) |
| 177 >>> from_key_val_list('string') |
| 178 ValueError: need more than 1 value to unpack |
| 179 >>> from_key_val_list({'key': 'val'}) |
| 180 OrderedDict([('key', 'val')]) |
| 181 |
| 182 :rtype: OrderedDict |
| 183 """ |
| 184 if value is None: |
| 185 return None |
| 186 |
| 187 if isinstance(value, (str, bytes, bool, int)): |
| 188 raise ValueError('cannot encode objects that are not 2-tuples') |
| 189 |
| 190 return OrderedDict(value) |
| 191 |
| 192 |
| 193 def to_key_val_list(value): |
| 194 """Take an object and test to see if it can be represented as a |
| 195 dictionary. If it can be, return a list of tuples, e.g., |
| 196 |
| 197 :: |
| 198 |
| 199 >>> to_key_val_list([('key', 'val')]) |
| 200 [('key', 'val')] |
| 201 >>> to_key_val_list({'key': 'val'}) |
| 202 [('key', 'val')] |
| 203 >>> to_key_val_list('string') |
| 204 ValueError: cannot encode objects that are not 2-tuples. |
| 205 |
| 206 :rtype: list |
| 207 """ |
| 208 if value is None: |
| 209 return None |
| 210 |
| 211 if isinstance(value, (str, bytes, bool, int)): |
| 212 raise ValueError('cannot encode objects that are not 2-tuples') |
| 213 |
| 214 if isinstance(value, collections.Mapping): |
| 215 value = value.items() |
| 216 |
| 217 return list(value) |
| 218 |
| 219 |
| 220 # From mitsuhiko/werkzeug (used with permission). |
| 221 def parse_list_header(value): |
| 222 """Parse lists as described by RFC 2068 Section 2. |
| 223 |
| 224 In particular, parse comma-separated lists where the elements of |
| 225 the list may include quoted-strings. A quoted-string could |
| 226 contain a comma. A non-quoted string could have quotes in the |
| 227 middle. Quotes are removed automatically after parsing. |
| 228 |
| 229 It basically works like :func:`parse_set_header` just that items |
| 230 may appear multiple times and case sensitivity is preserved. |
| 231 |
| 232 The return value is a standard :class:`list`: |
| 233 |
| 234 >>> parse_list_header('token, "quoted value"') |
| 235 ['token', 'quoted value'] |
| 236 |
| 237 To create a header from the :class:`list` again, use the |
| 238 :func:`dump_header` function. |
| 239 |
| 240 :param value: a string with a list header. |
| 241 :return: :class:`list` |
| 242 :rtype: list |
| 243 """ |
| 244 result = [] |
| 245 for item in _parse_list_header(value): |
| 246 if item[:1] == item[-1:] == '"': |
| 247 item = unquote_header_value(item[1:-1]) |
| 248 result.append(item) |
| 249 return result |
| 250 |
| 251 |
| 252 # From mitsuhiko/werkzeug (used with permission). |
| 253 def parse_dict_header(value): |
| 254 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and |
| 255 convert them into a python dict: |
| 256 |
| 257 >>> d = parse_dict_header('foo="is a fish", bar="as well"') |
| 258 >>> type(d) is dict |
| 259 True |
| 260 >>> sorted(d.items()) |
| 261 [('bar', 'as well'), ('foo', 'is a fish')] |
| 262 |
| 263 If there is no value for a key it will be `None`: |
| 264 |
| 265 >>> parse_dict_header('key_without_value') |
| 266 {'key_without_value': None} |
| 267 |
| 268 To create a header from the :class:`dict` again, use the |
| 269 :func:`dump_header` function. |
| 270 |
| 271 :param value: a string with a dict header. |
| 272 :return: :class:`dict` |
| 273 :rtype: dict |
| 274 """ |
| 275 result = {} |
| 276 for item in _parse_list_header(value): |
| 277 if '=' not in item: |
| 278 result[item] = None |
| 279 continue |
| 280 name, value = item.split('=', 1) |
| 281 if value[:1] == value[-1:] == '"': |
| 282 value = unquote_header_value(value[1:-1]) |
| 283 result[name] = value |
| 284 return result |
| 285 |
| 286 |
| 287 # From mitsuhiko/werkzeug (used with permission). |
| 288 def unquote_header_value(value, is_filename=False): |
| 289 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). |
| 290 This does not use the real unquoting but what browsers are actually |
| 291 using for quoting. |
| 292 |
| 293 :param value: the header value to unquote. |
| 294 :rtype: str |
| 295 """ |
| 296 if value and value[0] == value[-1] == '"': |
| 297 # this is not the real unquoting, but fixing this so that the |
| 298 # RFC is met will result in bugs with internet explorer and |
| 299 # probably some other browsers as well. IE for example is |
| 300 # uploading files with "C:\foo\bar.txt" as filename |
| 301 value = value[1:-1] |
| 302 |
| 303 # if this is a filename and the starting characters look like |
| 304 # a UNC path, then just return the value without quotes. Using the |
| 305 # replace sequence below on a UNC path has the effect of turning |
| 306 # the leading double slash into a single slash and then |
| 307 # _fix_ie_filename() doesn't work correctly. See #458. |
| 308 if not is_filename or value[:2] != '\\\\': |
| 309 return value.replace('\\\\', '\\').replace('\\"', '"') |
| 310 return value |
| 311 |
| 312 |
| 313 def dict_from_cookiejar(cj): |
| 314 """Returns a key/value dictionary from a CookieJar. |
| 315 |
| 316 :param cj: CookieJar object to extract cookies from. |
| 317 :rtype: dict |
| 318 """ |
| 319 |
| 320 cookie_dict = {} |
| 321 |
| 322 for cookie in cj: |
| 323 cookie_dict[cookie.name] = cookie.value |
| 324 |
| 325 return cookie_dict |
| 326 |
| 327 |
| 328 def add_dict_to_cookiejar(cj, cookie_dict): |
| 329 """Returns a CookieJar from a key/value dictionary. |
| 330 |
| 331 :param cj: CookieJar to insert cookies into. |
| 332 :param cookie_dict: Dict of key/values to insert into CookieJar. |
| 333 :rtype: CookieJar |
| 334 """ |
| 335 |
| 336 return cookiejar_from_dict(cookie_dict, cj) |
| 337 |
| 338 |
| 339 def get_encodings_from_content(content): |
| 340 """Returns encodings from given content string. |
| 341 |
| 342 :param content: bytestring to extract encodings from. |
| 343 """ |
| 344 warnings.warn(( |
| 345 'In requests 3.0, get_encodings_from_content will be removed. For ' |
| 346 'more information, please see the discussion on issue #2266. (This' |
| 347 ' warning should only appear once.)'), |
| 348 DeprecationWarning) |
| 349 |
| 350 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) |
| 351 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags
=re.I) |
| 352 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') |
| 353 |
| 354 return (charset_re.findall(content) + |
| 355 pragma_re.findall(content) + |
| 356 xml_re.findall(content)) |
| 357 |
| 358 |
| 359 def get_encoding_from_headers(headers): |
| 360 """Returns encodings from given HTTP Header Dict. |
| 361 |
| 362 :param headers: dictionary to extract encoding from. |
| 363 :rtype: str |
| 364 """ |
| 365 |
| 366 content_type = headers.get('content-type') |
| 367 |
| 368 if not content_type: |
| 369 return None |
| 370 |
| 371 content_type, params = cgi.parse_header(content_type) |
| 372 |
| 373 if 'charset' in params: |
| 374 return params['charset'].strip("'\"") |
| 375 |
| 376 if 'text' in content_type: |
| 377 return 'ISO-8859-1' |
| 378 |
| 379 |
| 380 def stream_decode_response_unicode(iterator, r): |
| 381 """Stream decodes a iterator.""" |
| 382 |
| 383 if r.encoding is None: |
| 384 for item in iterator: |
| 385 yield item |
| 386 return |
| 387 |
| 388 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') |
| 389 for chunk in iterator: |
| 390 rv = decoder.decode(chunk) |
| 391 if rv: |
| 392 yield rv |
| 393 rv = decoder.decode(b'', final=True) |
| 394 if rv: |
| 395 yield rv |
| 396 |
| 397 |
| 398 def iter_slices(string, slice_length): |
| 399 """Iterate over slices of a string.""" |
| 400 pos = 0 |
| 401 if slice_length is None or slice_length <= 0: |
| 402 slice_length = len(string) |
| 403 while pos < len(string): |
| 404 yield string[pos:pos + slice_length] |
| 405 pos += slice_length |
| 406 |
| 407 |
| 408 def get_unicode_from_response(r): |
| 409 """Returns the requested content back in unicode. |
| 410 |
| 411 :param r: Response object to get unicode content from. |
| 412 |
| 413 Tried: |
| 414 |
| 415 1. charset from content-type |
| 416 2. fall back and replace all unicode characters |
| 417 |
| 418 :rtype: str |
| 419 """ |
| 420 warnings.warn(( |
| 421 'In requests 3.0, get_unicode_from_response will be removed. For ' |
| 422 'more information, please see the discussion on issue #2266. (This' |
| 423 ' warning should only appear once.)'), |
| 424 DeprecationWarning) |
| 425 |
| 426 tried_encodings = [] |
| 427 |
| 428 # Try charset from content-type |
| 429 encoding = get_encoding_from_headers(r.headers) |
| 430 |
| 431 if encoding: |
| 432 try: |
| 433 return str(r.content, encoding) |
| 434 except UnicodeError: |
| 435 tried_encodings.append(encoding) |
| 436 |
| 437 # Fall back: |
| 438 try: |
| 439 return str(r.content, encoding, errors='replace') |
| 440 except TypeError: |
| 441 return r.content |
| 442 |
| 443 |
| 444 # The unreserved URI characters (RFC 3986) |
| 445 UNRESERVED_SET = frozenset( |
| 446 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" |
| 447 + "0123456789-._~") |
| 448 |
| 449 |
| 450 def unquote_unreserved(uri): |
| 451 """Un-escape any percent-escape sequences in a URI that are unreserved |
| 452 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. |
| 453 |
| 454 :rtype: str |
| 455 """ |
| 456 parts = uri.split('%') |
| 457 for i in range(1, len(parts)): |
| 458 h = parts[i][0:2] |
| 459 if len(h) == 2 and h.isalnum(): |
| 460 try: |
| 461 c = chr(int(h, 16)) |
| 462 except ValueError: |
| 463 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) |
| 464 |
| 465 if c in UNRESERVED_SET: |
| 466 parts[i] = c + parts[i][2:] |
| 467 else: |
| 468 parts[i] = '%' + parts[i] |
| 469 else: |
| 470 parts[i] = '%' + parts[i] |
| 471 return ''.join(parts) |
| 472 |
| 473 |
| 474 def requote_uri(uri): |
| 475 """Re-quote the given URI. |
| 476 |
| 477 This function passes the given URI through an unquote/quote cycle to |
| 478 ensure that it is fully and consistently quoted. |
| 479 |
| 480 :rtype: str |
| 481 """ |
| 482 safe_with_percent = "!#$%&'()*+,/:;=?@[]~" |
| 483 safe_without_percent = "!#$&'()*+,/:;=?@[]~" |
| 484 try: |
| 485 # Unquote only the unreserved characters |
| 486 # Then quote only illegal characters (do not quote reserved, |
| 487 # unreserved, or '%') |
| 488 return quote(unquote_unreserved(uri), safe=safe_with_percent) |
| 489 except InvalidURL: |
| 490 # We couldn't unquote the given URI, so let's try quoting it, but |
| 491 # there may be unquoted '%'s in the URI. We need to make sure they're |
| 492 # properly quoted so they do not cause issues elsewhere. |
| 493 return quote(uri, safe=safe_without_percent) |
| 494 |
| 495 |
| 496 def address_in_network(ip, net): |
| 497 """This function allows you to check if on IP belongs to a network subnet |
| 498 |
| 499 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24 |
| 500 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24 |
| 501 |
| 502 :rtype: bool |
| 503 """ |
| 504 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0] |
| 505 netaddr, bits = net.split('/') |
| 506 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0
] |
| 507 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask |
| 508 return (ipaddr & netmask) == (network & netmask) |
| 509 |
| 510 |
| 511 def dotted_netmask(mask): |
| 512 """Converts mask from /xx format to xxx.xxx.xxx.xxx |
| 513 |
| 514 Example: if mask is 24 function returns 255.255.255.0 |
| 515 |
| 516 :rtype: str |
| 517 """ |
| 518 bits = 0xffffffff ^ (1 << 32 - mask) - 1 |
| 519 return socket.inet_ntoa(struct.pack('>I', bits)) |
| 520 |
| 521 |
| 522 def is_ipv4_address(string_ip): |
| 523 """ |
| 524 :rtype: bool |
| 525 """ |
| 526 try: |
| 527 socket.inet_aton(string_ip) |
| 528 except socket.error: |
| 529 return False |
| 530 return True |
| 531 |
| 532 |
| 533 def is_valid_cidr(string_network): |
| 534 """ |
| 535 Very simple check of the cidr format in no_proxy variable. |
| 536 |
| 537 :rtype: bool |
| 538 """ |
| 539 if string_network.count('/') == 1: |
| 540 try: |
| 541 mask = int(string_network.split('/')[1]) |
| 542 except ValueError: |
| 543 return False |
| 544 |
| 545 if mask < 1 or mask > 32: |
| 546 return False |
| 547 |
| 548 try: |
| 549 socket.inet_aton(string_network.split('/')[0]) |
| 550 except socket.error: |
| 551 return False |
| 552 else: |
| 553 return False |
| 554 return True |
| 555 |
| 556 |
| 557 def should_bypass_proxies(url): |
| 558 """ |
| 559 Returns whether we should bypass proxies or not. |
| 560 |
| 561 :rtype: bool |
| 562 """ |
| 563 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) |
| 564 |
| 565 # First check whether no_proxy is defined. If it is, check that the URL |
| 566 # we're getting isn't in the no_proxy list. |
| 567 no_proxy = get_proxy('no_proxy') |
| 568 netloc = urlparse(url).netloc |
| 569 |
| 570 if no_proxy: |
| 571 # We need to check whether we match here. We need to see if we match |
| 572 # the end of the netloc, both with and without the port. |
| 573 no_proxy = ( |
| 574 host for host in no_proxy.replace(' ', '').split(',') if host |
| 575 ) |
| 576 |
| 577 ip = netloc.split(':')[0] |
| 578 if is_ipv4_address(ip): |
| 579 for proxy_ip in no_proxy: |
| 580 if is_valid_cidr(proxy_ip): |
| 581 if address_in_network(ip, proxy_ip): |
| 582 return True |
| 583 elif ip == proxy_ip: |
| 584 # If no_proxy ip was defined in plain IP notation instead of
cidr notation & |
| 585 # matches the IP of the index |
| 586 return True |
| 587 else: |
| 588 for host in no_proxy: |
| 589 if netloc.endswith(host) or netloc.split(':')[0].endswith(host): |
| 590 # The URL does match something in no_proxy, so we don't want |
| 591 # to apply the proxies on this URL. |
| 592 return True |
| 593 |
| 594 # If the system proxy settings indicate that this URL should be bypassed, |
| 595 # don't proxy. |
| 596 # The proxy_bypass function is incredibly buggy on OS X in early versions |
| 597 # of Python 2.6, so allow this call to fail. Only catch the specific |
| 598 # exceptions we've seen, though: this call failing in other ways can reveal |
| 599 # legitimate problems. |
| 600 try: |
| 601 bypass = proxy_bypass(netloc) |
| 602 except (TypeError, socket.gaierror): |
| 603 bypass = False |
| 604 |
| 605 if bypass: |
| 606 return True |
| 607 |
| 608 return False |
| 609 |
| 610 |
| 611 def get_environ_proxies(url): |
| 612 """ |
| 613 Return a dict of environment proxies. |
| 614 |
| 615 :rtype: dict |
| 616 """ |
| 617 if should_bypass_proxies(url): |
| 618 return {} |
| 619 else: |
| 620 return getproxies() |
| 621 |
| 622 |
| 623 def select_proxy(url, proxies): |
| 624 """Select a proxy for the url, if applicable. |
| 625 |
| 626 :param url: The url being for the request |
| 627 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs |
| 628 """ |
| 629 proxies = proxies or {} |
| 630 urlparts = urlparse(url) |
| 631 if urlparts.hostname is None: |
| 632 return proxies.get(urlparts.scheme, proxies.get('all')) |
| 633 |
| 634 proxy_keys = [ |
| 635 urlparts.scheme + '://' + urlparts.hostname, |
| 636 urlparts.scheme, |
| 637 'all://' + urlparts.hostname, |
| 638 'all', |
| 639 ] |
| 640 proxy = None |
| 641 for proxy_key in proxy_keys: |
| 642 if proxy_key in proxies: |
| 643 proxy = proxies[proxy_key] |
| 644 break |
| 645 |
| 646 return proxy |
| 647 |
| 648 |
| 649 def default_user_agent(name="python-requests"): |
| 650 """ |
| 651 Return a string representing the default user agent. |
| 652 |
| 653 :rtype: str |
| 654 """ |
| 655 return '%s/%s' % (name, __version__) |
| 656 |
| 657 |
| 658 def default_headers(): |
| 659 """ |
| 660 :rtype: requests.structures.CaseInsensitiveDict |
| 661 """ |
| 662 return CaseInsensitiveDict({ |
| 663 'User-Agent': default_user_agent(), |
| 664 'Accept-Encoding': ', '.join(('gzip', 'deflate')), |
| 665 'Accept': '*/*', |
| 666 'Connection': 'keep-alive', |
| 667 }) |
| 668 |
| 669 |
| 670 def parse_header_links(value): |
| 671 """Return a dict of parsed link headers proxies. |
| 672 |
| 673 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../
back.jpeg>; rel=back;type="image/jpeg" |
| 674 |
| 675 :rtype: list |
| 676 """ |
| 677 |
| 678 links = [] |
| 679 |
| 680 replace_chars = ' \'"' |
| 681 |
| 682 for val in re.split(', *<', value): |
| 683 try: |
| 684 url, params = val.split(';', 1) |
| 685 except ValueError: |
| 686 url, params = val, '' |
| 687 |
| 688 link = {'url': url.strip('<> \'"')} |
| 689 |
| 690 for param in params.split(';'): |
| 691 try: |
| 692 key, value = param.split('=') |
| 693 except ValueError: |
| 694 break |
| 695 |
| 696 link[key.strip(replace_chars)] = value.strip(replace_chars) |
| 697 |
| 698 links.append(link) |
| 699 |
| 700 return links |
| 701 |
| 702 |
| 703 # Null bytes; no need to recreate these on each call to guess_json_utf |
| 704 _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3 |
| 705 _null2 = _null * 2 |
| 706 _null3 = _null * 3 |
| 707 |
| 708 |
| 709 def guess_json_utf(data): |
| 710 """ |
| 711 :rtype: str |
| 712 """ |
| 713 # JSON always starts with two ASCII characters, so detection is as |
| 714 # easy as counting the nulls and from their location and count |
| 715 # determine the encoding. Also detect a BOM, if present. |
| 716 sample = data[:4] |
| 717 if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): |
| 718 return 'utf-32' # BOM included |
| 719 if sample[:3] == codecs.BOM_UTF8: |
| 720 return 'utf-8-sig' # BOM included, MS style (discouraged) |
| 721 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): |
| 722 return 'utf-16' # BOM included |
| 723 nullcount = sample.count(_null) |
| 724 if nullcount == 0: |
| 725 return 'utf-8' |
| 726 if nullcount == 2: |
| 727 if sample[::2] == _null2: # 1st and 3rd are null |
| 728 return 'utf-16-be' |
| 729 if sample[1::2] == _null2: # 2nd and 4th are null |
| 730 return 'utf-16-le' |
| 731 # Did not detect 2 valid UTF-16 ascii-range characters |
| 732 if nullcount == 3: |
| 733 if sample[:3] == _null3: |
| 734 return 'utf-32-be' |
| 735 if sample[1:] == _null3: |
| 736 return 'utf-32-le' |
| 737 # Did not detect a valid UTF-32 ascii-range character |
| 738 return None |
| 739 |
| 740 |
| 741 def prepend_scheme_if_needed(url, new_scheme): |
| 742 """Given a URL that may or may not have a scheme, prepend the given scheme. |
| 743 Does not replace a present scheme with the one provided as an argument. |
| 744 |
| 745 :rtype: str |
| 746 """ |
| 747 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) |
| 748 |
| 749 # urlparse is a finicky beast, and sometimes decides that there isn't a |
| 750 # netloc present. Assume that it's being over-cautious, and switch netloc |
| 751 # and path if urlparse decided there was no netloc. |
| 752 if not netloc: |
| 753 netloc, path = path, netloc |
| 754 |
| 755 return urlunparse((scheme, netloc, path, params, query, fragment)) |
| 756 |
| 757 |
| 758 def get_auth_from_url(url): |
| 759 """Given a url with authentication components, extract them into a tuple of |
| 760 username,password. |
| 761 |
| 762 :rtype: (str,str) |
| 763 """ |
| 764 parsed = urlparse(url) |
| 765 |
| 766 try: |
| 767 auth = (unquote(parsed.username), unquote(parsed.password)) |
| 768 except (AttributeError, TypeError): |
| 769 auth = ('', '') |
| 770 |
| 771 return auth |
| 772 |
| 773 |
| 774 # Moved outside of function to avoid recompile every call |
| 775 _CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$') |
| 776 _CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$') |
| 777 |
| 778 def check_header_validity(header): |
| 779 """Verifies that header value is a string which doesn't contain |
| 780 leading whitespace or return characters. This prevents unintended |
| 781 header injection. |
| 782 |
| 783 :param header: tuple, in the format (name, value). |
| 784 """ |
| 785 name, value = header |
| 786 |
| 787 if isinstance(value, bytes): |
| 788 pat = _CLEAN_HEADER_REGEX_BYTE |
| 789 else: |
| 790 pat = _CLEAN_HEADER_REGEX_STR |
| 791 try: |
| 792 if not pat.match(value): |
| 793 raise InvalidHeader("Invalid return character or leading space in he
ader: %s" % name) |
| 794 except TypeError: |
| 795 raise InvalidHeader("Header value %s must be of type str or bytes, " |
| 796 "not %s" % (value, type(value))) |
| 797 |
| 798 |
| 799 def urldefragauth(url): |
| 800 """ |
| 801 Given a url remove the fragment and the authentication part. |
| 802 |
| 803 :rtype: str |
| 804 """ |
| 805 scheme, netloc, path, params, query, fragment = urlparse(url) |
| 806 |
| 807 # see func:`prepend_scheme_if_needed` |
| 808 if not netloc: |
| 809 netloc, path = path, netloc |
| 810 |
| 811 netloc = netloc.rsplit('@', 1)[-1] |
| 812 |
| 813 return urlunparse((scheme, netloc, path, params, query, '')) |
| 814 |
| 815 def rewind_body(prepared_request): |
| 816 """Move file pointer back to its recorded starting position |
| 817 so it can be read again on redirect. |
| 818 """ |
| 819 body_seek = getattr(prepared_request.body, 'seek', None) |
| 820 if body_seek is not None and isinstance(prepared_request._body_position, int
eger_types): |
| 821 try: |
| 822 body_seek(prepared_request._body_position) |
| 823 except (IOError, OSError): |
| 824 raise UnrewindableBodyError("An error occured when rewinding request
" |
| 825 "body for redirect.") |
| 826 else: |
| 827 raise UnrewindableBodyError("Unable to rewind request body for redirect.
") |
OLD | NEW |