OLD | NEW |
| (Empty) |
1 # urllib3/util.py | |
2 # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) | |
3 # | |
4 # This module is part of urllib3 and is released under | |
5 # the MIT License: http://www.opensource.org/licenses/mit-license.php | |
6 | |
7 | |
8 from base64 import b64encode | |
9 from binascii import hexlify, unhexlify | |
10 from collections import namedtuple | |
11 from hashlib import md5, sha1 | |
12 from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT | |
13 import time | |
14 | |
15 try: | |
16 from select import poll, POLLIN | |
17 except ImportError: # `poll` doesn't exist on OSX and other platforms | |
18 poll = False | |
19 try: | |
20 from select import select | |
21 except ImportError: # `select` doesn't exist on AppEngine. | |
22 select = False | |
23 | |
24 try: # Test for SSL features | |
25 SSLContext = None | |
26 HAS_SNI = False | |
27 | |
28 import ssl | |
29 from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 | |
30 from ssl import SSLContext # Modern SSL? | |
31 from ssl import HAS_SNI # Has SNI? | |
32 except ImportError: | |
33 pass | |
34 | |
35 from .packages import six | |
36 from .exceptions import LocationParseError, SSLError, TimeoutStateError | |
37 | |
38 | |
39 _Default = object() | |
40 # The default timeout to use for socket connections. This is the attribute used | |
41 # by httplib to define the default timeout | |
42 | |
43 | |
44 def current_time(): | |
45 """ | |
46 Retrieve the current time, this function is mocked out in unit testing. | |
47 """ | |
48 return time.time() | |
49 | |
50 | |
51 class Timeout(object): | |
52 """ | |
53 Utility object for storing timeout values. | |
54 | |
55 Example usage: | |
56 | |
57 .. code-block:: python | |
58 | |
59 timeout = urllib3.util.Timeout(connect=2.0, read=7.0) | |
60 pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) | |
61 pool.request(...) # Etc, etc | |
62 | |
63 :param connect: | |
64 The maximum amount of time to wait for a connection attempt to a server | |
65 to succeed. Omitting the parameter will default the connect timeout to | |
66 the system default, probably `the global default timeout in socket.py | |
67 <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. | |
68 None will set an infinite timeout for connection attempts. | |
69 | |
70 :type connect: integer, float, or None | |
71 | |
72 :param read: | |
73 The maximum amount of time to wait between consecutive | |
74 read operations for a response from the server. Omitting | |
75 the parameter will default the read timeout to the system | |
76 default, probably `the global default timeout in socket.py | |
77 <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. | |
78 None will set an infinite timeout. | |
79 | |
80 :type read: integer, float, or None | |
81 | |
82 :param total: | |
83 The maximum amount of time to wait for an HTTP request to connect and | |
84 return. This combines the connect and read timeouts into one. In the | |
85 event that both a connect timeout and a total are specified, or a read | |
86 timeout and a total are specified, the shorter timeout will be applied. | |
87 | |
88 Defaults to None. | |
89 | |
90 | |
91 :type total: integer, float, or None | |
92 | |
93 .. note:: | |
94 | |
95 Many factors can affect the total amount of time for urllib3 to return | |
96 an HTTP response. Specifically, Python's DNS resolver does not obey the | |
97 timeout specified on the socket. Other factors that can affect total | |
98 request time include high CPU load, high swap, the program running at a | |
99 low priority level, or other behaviors. The observed running time for | |
100 urllib3 to return a response may be greater than the value passed to | |
101 `total`. | |
102 | |
103 In addition, the read and total timeouts only measure the time between | |
104 read operations on the socket connecting the client and the server, not | |
105 the total amount of time for the request to return a complete response. | |
106 As an example, you may want a request to return within 7 seconds or | |
107 fail, so you set the ``total`` timeout to 7 seconds. If the server | |
108 sends one byte to you every 5 seconds, the request will **not** trigger | |
109 time out. This case is admittedly rare. | |
110 """ | |
111 | |
112 #: A sentinel object representing the default timeout value | |
113 DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT | |
114 | |
115 def __init__(self, connect=_Default, read=_Default, total=None): | |
116 self._connect = self._validate_timeout(connect, 'connect') | |
117 self._read = self._validate_timeout(read, 'read') | |
118 self.total = self._validate_timeout(total, 'total') | |
119 self._start_connect = None | |
120 | |
121 def __str__(self): | |
122 return '%s(connect=%r, read=%r, total=%r)' % ( | |
123 type(self).__name__, self._connect, self._read, self.total) | |
124 | |
125 | |
126 @classmethod | |
127 def _validate_timeout(cls, value, name): | |
128 """ Check that a timeout attribute is valid | |
129 | |
130 :param value: The timeout value to validate | |
131 :param name: The name of the timeout attribute to validate. This is used | |
132 for clear error messages | |
133 :return: the value | |
134 :raises ValueError: if the type is not an integer or a float, or if it | |
135 is a numeric value less than zero | |
136 """ | |
137 if value is _Default: | |
138 return cls.DEFAULT_TIMEOUT | |
139 | |
140 if value is None or value is cls.DEFAULT_TIMEOUT: | |
141 return value | |
142 | |
143 try: | |
144 float(value) | |
145 except (TypeError, ValueError): | |
146 raise ValueError("Timeout value %s was %s, but it must be an " | |
147 "int or float." % (name, value)) | |
148 | |
149 try: | |
150 if value < 0: | |
151 raise ValueError("Attempted to set %s timeout to %s, but the " | |
152 "timeout cannot be set to a value less " | |
153 "than 0." % (name, value)) | |
154 except TypeError: # Python 3 | |
155 raise ValueError("Timeout value %s was %s, but it must be an " | |
156 "int or float." % (name, value)) | |
157 | |
158 return value | |
159 | |
160 @classmethod | |
161 def from_float(cls, timeout): | |
162 """ Create a new Timeout from a legacy timeout value. | |
163 | |
164 The timeout value used by httplib.py sets the same timeout on the | |
165 connect(), and recv() socket requests. This creates a :class:`Timeout` | |
166 object that sets the individual timeouts to the ``timeout`` value passed | |
167 to this function. | |
168 | |
169 :param timeout: The legacy timeout value | |
170 :type timeout: integer, float, sentinel default object, or None | |
171 :return: a Timeout object | |
172 :rtype: :class:`Timeout` | |
173 """ | |
174 return Timeout(read=timeout, connect=timeout) | |
175 | |
176 def clone(self): | |
177 """ Create a copy of the timeout object | |
178 | |
179 Timeout properties are stored per-pool but each request needs a fresh | |
180 Timeout object to ensure each one has its own start/stop configured. | |
181 | |
182 :return: a copy of the timeout object | |
183 :rtype: :class:`Timeout` | |
184 """ | |
185 # We can't use copy.deepcopy because that will also create a new object | |
186 # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to | |
187 # detect the user default. | |
188 return Timeout(connect=self._connect, read=self._read, | |
189 total=self.total) | |
190 | |
191 def start_connect(self): | |
192 """ Start the timeout clock, used during a connect() attempt | |
193 | |
194 :raises urllib3.exceptions.TimeoutStateError: if you attempt | |
195 to start a timer that has been started already. | |
196 """ | |
197 if self._start_connect is not None: | |
198 raise TimeoutStateError("Timeout timer has already been started.") | |
199 self._start_connect = current_time() | |
200 return self._start_connect | |
201 | |
202 def get_connect_duration(self): | |
203 """ Gets the time elapsed since the call to :meth:`start_connect`. | |
204 | |
205 :return: the elapsed time | |
206 :rtype: float | |
207 :raises urllib3.exceptions.TimeoutStateError: if you attempt | |
208 to get duration for a timer that hasn't been started. | |
209 """ | |
210 if self._start_connect is None: | |
211 raise TimeoutStateError("Can't get connect duration for timer " | |
212 "that has not started.") | |
213 return current_time() - self._start_connect | |
214 | |
215 @property | |
216 def connect_timeout(self): | |
217 """ Get the value to use when setting a connection timeout. | |
218 | |
219 This will be a positive float or integer, the value None | |
220 (never timeout), or the default system timeout. | |
221 | |
222 :return: the connect timeout | |
223 :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None | |
224 """ | |
225 if self.total is None: | |
226 return self._connect | |
227 | |
228 if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: | |
229 return self.total | |
230 | |
231 return min(self._connect, self.total) | |
232 | |
233 @property | |
234 def read_timeout(self): | |
235 """ Get the value for the read timeout. | |
236 | |
237 This assumes some time has elapsed in the connection timeout and | |
238 computes the read timeout appropriately. | |
239 | |
240 If self.total is set, the read timeout is dependent on the amount of | |
241 time taken by the connect timeout. If the connection time has not been | |
242 established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be | |
243 raised. | |
244 | |
245 :return: the value to use for the read timeout | |
246 :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None | |
247 :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` | |
248 has not yet been called on this object. | |
249 """ | |
250 if (self.total is not None and | |
251 self.total is not self.DEFAULT_TIMEOUT and | |
252 self._read is not None and | |
253 self._read is not self.DEFAULT_TIMEOUT): | |
254 # in case the connect timeout has not yet been established. | |
255 if self._start_connect is None: | |
256 return self._read | |
257 return max(0, min(self.total - self.get_connect_duration(), | |
258 self._read)) | |
259 elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: | |
260 return max(0, self.total - self.get_connect_duration()) | |
261 else: | |
262 return self._read | |
263 | |
264 | |
265 class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query',
'fragment'])): | |
266 """ | |
267 Datastructure for representing an HTTP URL. Used as a return value for | |
268 :func:`parse_url`. | |
269 """ | |
270 slots = () | |
271 | |
272 def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, qu
ery=None, fragment=None): | |
273 return super(Url, cls).__new__(cls, scheme, auth, host, port, path, quer
y, fragment) | |
274 | |
275 @property | |
276 def hostname(self): | |
277 """For backwards-compatibility with urlparse. We're nice like that.""" | |
278 return self.host | |
279 | |
280 @property | |
281 def request_uri(self): | |
282 """Absolute path including the query string.""" | |
283 uri = self.path or '/' | |
284 | |
285 if self.query is not None: | |
286 uri += '?' + self.query | |
287 | |
288 return uri | |
289 | |
290 @property | |
291 def netloc(self): | |
292 """Network location including host and port""" | |
293 if self.port: | |
294 return '%s:%d' % (self.host, self.port) | |
295 return self.host | |
296 | |
297 | |
298 def split_first(s, delims): | |
299 """ | |
300 Given a string and an iterable of delimiters, split on the first found | |
301 delimiter. Return two split parts and the matched delimiter. | |
302 | |
303 If not found, then the first part is the full input string. | |
304 | |
305 Example: :: | |
306 | |
307 >>> split_first('foo/bar?baz', '?/=') | |
308 ('foo', 'bar?baz', '/') | |
309 >>> split_first('foo/bar?baz', '123') | |
310 ('foo/bar?baz', '', None) | |
311 | |
312 Scales linearly with number of delims. Not ideal for large number of delims. | |
313 """ | |
314 min_idx = None | |
315 min_delim = None | |
316 for d in delims: | |
317 idx = s.find(d) | |
318 if idx < 0: | |
319 continue | |
320 | |
321 if min_idx is None or idx < min_idx: | |
322 min_idx = idx | |
323 min_delim = d | |
324 | |
325 if min_idx is None or min_idx < 0: | |
326 return s, '', None | |
327 | |
328 return s[:min_idx], s[min_idx+1:], min_delim | |
329 | |
330 | |
331 def parse_url(url): | |
332 """ | |
333 Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is | |
334 performed to parse incomplete urls. Fields not provided will be None. | |
335 | |
336 Partly backwards-compatible with :mod:`urlparse`. | |
337 | |
338 Example: :: | |
339 | |
340 >>> parse_url('http://google.com/mail/') | |
341 Url(scheme='http', host='google.com', port=None, path='/', ...) | |
342 >>> parse_url('google.com:80') | |
343 Url(scheme=None, host='google.com', port=80, path=None, ...) | |
344 >>> parse_url('/foo?bar') | |
345 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) | |
346 """ | |
347 | |
348 # While this code has overlap with stdlib's urlparse, it is much | |
349 # simplified for our needs and less annoying. | |
350 # Additionally, this implementations does silly things to be optimal | |
351 # on CPython. | |
352 | |
353 scheme = None | |
354 auth = None | |
355 host = None | |
356 port = None | |
357 path = None | |
358 fragment = None | |
359 query = None | |
360 | |
361 # Scheme | |
362 if '://' in url: | |
363 scheme, url = url.split('://', 1) | |
364 | |
365 # Find the earliest Authority Terminator | |
366 # (http://tools.ietf.org/html/rfc3986#section-3.2) | |
367 url, path_, delim = split_first(url, ['/', '?', '#']) | |
368 | |
369 if delim: | |
370 # Reassemble the path | |
371 path = delim + path_ | |
372 | |
373 # Auth | |
374 if '@' in url: | |
375 auth, url = url.split('@', 1) | |
376 | |
377 # IPv6 | |
378 if url and url[0] == '[': | |
379 host, url = url.split(']', 1) | |
380 host += ']' | |
381 | |
382 # Port | |
383 if ':' in url: | |
384 _host, port = url.split(':', 1) | |
385 | |
386 if not host: | |
387 host = _host | |
388 | |
389 if not port.isdigit(): | |
390 raise LocationParseError("Failed to parse: %s" % url) | |
391 | |
392 port = int(port) | |
393 | |
394 elif not host and url: | |
395 host = url | |
396 | |
397 if not path: | |
398 return Url(scheme, auth, host, port, path, query, fragment) | |
399 | |
400 # Fragment | |
401 if '#' in path: | |
402 path, fragment = path.split('#', 1) | |
403 | |
404 # Query | |
405 if '?' in path: | |
406 path, query = path.split('?', 1) | |
407 | |
408 return Url(scheme, auth, host, port, path, query, fragment) | |
409 | |
410 | |
411 def get_host(url): | |
412 """ | |
413 Deprecated. Use :func:`.parse_url` instead. | |
414 """ | |
415 p = parse_url(url) | |
416 return p.scheme or 'http', p.hostname, p.port | |
417 | |
418 | |
419 def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, | |
420 basic_auth=None): | |
421 """ | |
422 Shortcuts for generating request headers. | |
423 | |
424 :param keep_alive: | |
425 If ``True``, adds 'connection: keep-alive' header. | |
426 | |
427 :param accept_encoding: | |
428 Can be a boolean, list, or string. | |
429 ``True`` translates to 'gzip,deflate'. | |
430 List will get joined by comma. | |
431 String will be used as provided. | |
432 | |
433 :param user_agent: | |
434 String representing the user-agent you want, such as | |
435 "python-urllib3/0.6" | |
436 | |
437 :param basic_auth: | |
438 Colon-separated username:password string for 'authorization: basic ...' | |
439 auth header. | |
440 | |
441 Example: :: | |
442 | |
443 >>> make_headers(keep_alive=True, user_agent="Batman/1.0") | |
444 {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} | |
445 >>> make_headers(accept_encoding=True) | |
446 {'accept-encoding': 'gzip,deflate'} | |
447 """ | |
448 headers = {} | |
449 if accept_encoding: | |
450 if isinstance(accept_encoding, str): | |
451 pass | |
452 elif isinstance(accept_encoding, list): | |
453 accept_encoding = ','.join(accept_encoding) | |
454 else: | |
455 accept_encoding = 'gzip,deflate' | |
456 headers['accept-encoding'] = accept_encoding | |
457 | |
458 if user_agent: | |
459 headers['user-agent'] = user_agent | |
460 | |
461 if keep_alive: | |
462 headers['connection'] = 'keep-alive' | |
463 | |
464 if basic_auth: | |
465 headers['authorization'] = 'Basic ' + \ | |
466 b64encode(six.b(basic_auth)).decode('utf-8') | |
467 | |
468 return headers | |
469 | |
470 | |
471 def is_connection_dropped(conn): # Platform-specific | |
472 """ | |
473 Returns True if the connection is dropped and should be closed. | |
474 | |
475 :param conn: | |
476 :class:`httplib.HTTPConnection` object. | |
477 | |
478 Note: For platforms like AppEngine, this will always return ``False`` to | |
479 let the platform handle connection recycling transparently for us. | |
480 """ | |
481 sock = getattr(conn, 'sock', False) | |
482 if not sock: # Platform-specific: AppEngine | |
483 return False | |
484 | |
485 if not poll: | |
486 if not select: # Platform-specific: AppEngine | |
487 return False | |
488 | |
489 try: | |
490 return select([sock], [], [], 0.0)[0] | |
491 except SocketError: | |
492 return True | |
493 | |
494 # This version is better on platforms that support it. | |
495 p = poll() | |
496 p.register(sock, POLLIN) | |
497 for (fno, ev) in p.poll(0.0): | |
498 if fno == sock.fileno(): | |
499 # Either data is buffered (bad), or the connection is dropped. | |
500 return True | |
501 | |
502 | |
503 def resolve_cert_reqs(candidate): | |
504 """ | |
505 Resolves the argument to a numeric constant, which can be passed to | |
506 the wrap_socket function/method from the ssl module. | |
507 Defaults to :data:`ssl.CERT_NONE`. | |
508 If given a string it is assumed to be the name of the constant in the | |
509 :mod:`ssl` module or its abbrevation. | |
510 (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. | |
511 If it's neither `None` nor a string we assume it is already the numeric | |
512 constant which can directly be passed to wrap_socket. | |
513 """ | |
514 if candidate is None: | |
515 return CERT_NONE | |
516 | |
517 if isinstance(candidate, str): | |
518 res = getattr(ssl, candidate, None) | |
519 if res is None: | |
520 res = getattr(ssl, 'CERT_' + candidate) | |
521 return res | |
522 | |
523 return candidate | |
524 | |
525 | |
526 def resolve_ssl_version(candidate): | |
527 """ | |
528 like resolve_cert_reqs | |
529 """ | |
530 if candidate is None: | |
531 return PROTOCOL_SSLv23 | |
532 | |
533 if isinstance(candidate, str): | |
534 res = getattr(ssl, candidate, None) | |
535 if res is None: | |
536 res = getattr(ssl, 'PROTOCOL_' + candidate) | |
537 return res | |
538 | |
539 return candidate | |
540 | |
541 | |
542 def assert_fingerprint(cert, fingerprint): | |
543 """ | |
544 Checks if given fingerprint matches the supplied certificate. | |
545 | |
546 :param cert: | |
547 Certificate as bytes object. | |
548 :param fingerprint: | |
549 Fingerprint as string of hexdigits, can be interspersed by colons. | |
550 """ | |
551 | |
552 # Maps the length of a digest to a possible hash function producing | |
553 # this digest. | |
554 hashfunc_map = { | |
555 16: md5, | |
556 20: sha1 | |
557 } | |
558 | |
559 fingerprint = fingerprint.replace(':', '').lower() | |
560 | |
561 digest_length, rest = divmod(len(fingerprint), 2) | |
562 | |
563 if rest or digest_length not in hashfunc_map: | |
564 raise SSLError('Fingerprint is of invalid length.') | |
565 | |
566 # We need encode() here for py32; works on py2 and p33. | |
567 fingerprint_bytes = unhexlify(fingerprint.encode()) | |
568 | |
569 hashfunc = hashfunc_map[digest_length] | |
570 | |
571 cert_digest = hashfunc(cert).digest() | |
572 | |
573 if not cert_digest == fingerprint_bytes: | |
574 raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' | |
575 .format(hexlify(fingerprint_bytes), | |
576 hexlify(cert_digest))) | |
577 | |
578 def is_fp_closed(obj): | |
579 """ | |
580 Checks whether a given file-like object is closed. | |
581 | |
582 :param obj: | |
583 The file-like object to check. | |
584 """ | |
585 if hasattr(obj, 'fp'): | |
586 # Object is a container for another file-like object that gets released | |
587 # on exhaustion (e.g. HTTPResponse) | |
588 return obj.fp is None | |
589 | |
590 return obj.closed | |
591 | |
592 | |
593 if SSLContext is not None: # Python 3.2+ | |
594 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, | |
595 ca_certs=None, server_hostname=None, | |
596 ssl_version=None): | |
597 """ | |
598 All arguments except `server_hostname` have the same meaning as for | |
599 :func:`ssl.wrap_socket` | |
600 | |
601 :param server_hostname: | |
602 Hostname of the expected certificate | |
603 """ | |
604 context = SSLContext(ssl_version) | |
605 context.verify_mode = cert_reqs | |
606 if ca_certs: | |
607 try: | |
608 context.load_verify_locations(ca_certs) | |
609 # Py32 raises IOError | |
610 # Py33 raises FileNotFoundError | |
611 except Exception as e: # Reraise as SSLError | |
612 raise SSLError(e) | |
613 if certfile: | |
614 # FIXME: This block needs a test. | |
615 context.load_cert_chain(certfile, keyfile) | |
616 if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI | |
617 return context.wrap_socket(sock, server_hostname=server_hostname) | |
618 return context.wrap_socket(sock) | |
619 | |
620 else: # Python 3.1 and earlier | |
621 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, | |
622 ca_certs=None, server_hostname=None, | |
623 ssl_version=None): | |
624 return wrap_socket(sock, keyfile=keyfile, certfile=certfile, | |
625 ca_certs=ca_certs, cert_reqs=cert_reqs, | |
626 ssl_version=ssl_version) | |
OLD | NEW |