OLD | NEW |
1 # urllib3/util.py | 1 # urllib3/util.py |
2 # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) | 2 # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) |
3 # | 3 # |
4 # This module is part of urllib3 and is released under | 4 # This module is part of urllib3 and is released under |
5 # the MIT License: http://www.opensource.org/licenses/mit-license.php | 5 # the MIT License: http://www.opensource.org/licenses/mit-license.php |
6 | 6 |
7 | 7 |
8 from base64 import b64encode | 8 from base64 import b64encode |
| 9 from binascii import hexlify, unhexlify |
9 from collections import namedtuple | 10 from collections import namedtuple |
10 from socket import error as SocketError | |
11 from hashlib import md5, sha1 | 11 from hashlib import md5, sha1 |
12 from binascii import hexlify, unhexlify | 12 from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT |
| 13 import time |
13 | 14 |
14 try: | 15 try: |
15 from select import poll, POLLIN | 16 from select import poll, POLLIN |
16 except ImportError: # `poll` doesn't exist on OSX and other platforms | 17 except ImportError: # `poll` doesn't exist on OSX and other platforms |
17 poll = False | 18 poll = False |
18 try: | 19 try: |
19 from select import select | 20 from select import select |
20 except ImportError: # `select` doesn't exist on AppEngine. | 21 except ImportError: # `select` doesn't exist on AppEngine. |
21 select = False | 22 select = False |
22 | 23 |
23 try: # Test for SSL features | 24 try: # Test for SSL features |
24 SSLContext = None | 25 SSLContext = None |
25 HAS_SNI = False | 26 HAS_SNI = False |
26 | 27 |
27 import ssl | 28 import ssl |
28 from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 | 29 from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 |
29 from ssl import SSLContext # Modern SSL? | 30 from ssl import SSLContext # Modern SSL? |
30 from ssl import HAS_SNI # Has SNI? | 31 from ssl import HAS_SNI # Has SNI? |
31 except ImportError: | 32 except ImportError: |
32 pass | 33 pass |
33 | 34 |
34 | |
35 from .packages import six | 35 from .packages import six |
36 from .exceptions import LocationParseError, SSLError | 36 from .exceptions import LocationParseError, SSLError, TimeoutStateError |
| 37 |
| 38 |
| 39 _Default = object() |
| 40 # The default timeout to use for socket connections. This is the attribute used |
| 41 # by httplib to define the default timeout |
| 42 |
| 43 |
| 44 def current_time(): |
| 45 """ |
| 46 Retrieve the current time, this function is mocked out in unit testing. |
| 47 """ |
| 48 return time.time() |
| 49 |
| 50 |
| 51 class Timeout(object): |
| 52 """ |
| 53 Utility object for storing timeout values. |
| 54 |
| 55 Example usage: |
| 56 |
| 57 .. code-block:: python |
| 58 |
| 59 timeout = urllib3.util.Timeout(connect=2.0, read=7.0) |
| 60 pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) |
| 61 pool.request(...) # Etc, etc |
| 62 |
| 63 :param connect: |
| 64 The maximum amount of time to wait for a connection attempt to a server |
| 65 to succeed. Omitting the parameter will default the connect timeout to |
| 66 the system default, probably `the global default timeout in socket.py |
| 67 <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. |
| 68 None will set an infinite timeout for connection attempts. |
| 69 |
| 70 :type connect: integer, float, or None |
| 71 |
| 72 :param read: |
| 73 The maximum amount of time to wait between consecutive |
| 74 read operations for a response from the server. Omitting |
| 75 the parameter will default the read timeout to the system |
| 76 default, probably `the global default timeout in socket.py |
| 77 <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. |
| 78 None will set an infinite timeout. |
| 79 |
| 80 :type read: integer, float, or None |
| 81 |
| 82 :param total: |
| 83 The maximum amount of time to wait for an HTTP request to connect and |
| 84 return. This combines the connect and read timeouts into one. In the |
| 85 event that both a connect timeout and a total are specified, or a read |
| 86 timeout and a total are specified, the shorter timeout will be applied. |
| 87 |
| 88 Defaults to None. |
| 89 |
| 90 |
| 91 :type total: integer, float, or None |
| 92 |
| 93 .. note:: |
| 94 |
| 95 Many factors can affect the total amount of time for urllib3 to return |
| 96 an HTTP response. Specifically, Python's DNS resolver does not obey the |
| 97 timeout specified on the socket. Other factors that can affect total |
| 98 request time include high CPU load, high swap, the program running at a |
| 99 low priority level, or other behaviors. The observed running time for |
| 100 urllib3 to return a response may be greater than the value passed to |
| 101 `total`. |
| 102 |
| 103 In addition, the read and total timeouts only measure the time between |
| 104 read operations on the socket connecting the client and the server, not |
| 105 the total amount of time for the request to return a complete response. |
| 106 As an example, you may want a request to return within 7 seconds or |
| 107 fail, so you set the ``total`` timeout to 7 seconds. If the server |
| 108 sends one byte to you every 5 seconds, the request will **not** trigger |
| 109 time out. This case is admittedly rare. |
| 110 """ |
| 111 |
| 112 #: A sentinel object representing the default timeout value |
| 113 DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT |
| 114 |
| 115 def __init__(self, connect=_Default, read=_Default, total=None): |
| 116 self._connect = self._validate_timeout(connect, 'connect') |
| 117 self._read = self._validate_timeout(read, 'read') |
| 118 self.total = self._validate_timeout(total, 'total') |
| 119 self._start_connect = None |
| 120 |
| 121 def __str__(self): |
| 122 return '%s(connect=%r, read=%r, total=%r)' % ( |
| 123 type(self).__name__, self._connect, self._read, self.total) |
| 124 |
| 125 |
| 126 @classmethod |
| 127 def _validate_timeout(cls, value, name): |
| 128 """ Check that a timeout attribute is valid |
| 129 |
| 130 :param value: The timeout value to validate |
| 131 :param name: The name of the timeout attribute to validate. This is used |
| 132 for clear error messages |
| 133 :return: the value |
| 134 :raises ValueError: if the type is not an integer or a float, or if it |
| 135 is a numeric value less than zero |
| 136 """ |
| 137 if value is _Default: |
| 138 return cls.DEFAULT_TIMEOUT |
| 139 |
| 140 if value is None or value is cls.DEFAULT_TIMEOUT: |
| 141 return value |
| 142 |
| 143 try: |
| 144 float(value) |
| 145 except (TypeError, ValueError): |
| 146 raise ValueError("Timeout value %s was %s, but it must be an " |
| 147 "int or float." % (name, value)) |
| 148 |
| 149 try: |
| 150 if value < 0: |
| 151 raise ValueError("Attempted to set %s timeout to %s, but the " |
| 152 "timeout cannot be set to a value less " |
| 153 "than 0." % (name, value)) |
| 154 except TypeError: # Python 3 |
| 155 raise ValueError("Timeout value %s was %s, but it must be an " |
| 156 "int or float." % (name, value)) |
| 157 |
| 158 return value |
| 159 |
| 160 @classmethod |
| 161 def from_float(cls, timeout): |
| 162 """ Create a new Timeout from a legacy timeout value. |
| 163 |
| 164 The timeout value used by httplib.py sets the same timeout on the |
| 165 connect(), and recv() socket requests. This creates a :class:`Timeout` |
| 166 object that sets the individual timeouts to the ``timeout`` value passed |
| 167 to this function. |
| 168 |
| 169 :param timeout: The legacy timeout value |
| 170 :type timeout: integer, float, sentinel default object, or None |
| 171 :return: a Timeout object |
| 172 :rtype: :class:`Timeout` |
| 173 """ |
| 174 return Timeout(read=timeout, connect=timeout) |
| 175 |
| 176 def clone(self): |
| 177 """ Create a copy of the timeout object |
| 178 |
| 179 Timeout properties are stored per-pool but each request needs a fresh |
| 180 Timeout object to ensure each one has its own start/stop configured. |
| 181 |
| 182 :return: a copy of the timeout object |
| 183 :rtype: :class:`Timeout` |
| 184 """ |
| 185 # We can't use copy.deepcopy because that will also create a new object |
| 186 # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to |
| 187 # detect the user default. |
| 188 return Timeout(connect=self._connect, read=self._read, |
| 189 total=self.total) |
| 190 |
| 191 def start_connect(self): |
| 192 """ Start the timeout clock, used during a connect() attempt |
| 193 |
| 194 :raises urllib3.exceptions.TimeoutStateError: if you attempt |
| 195 to start a timer that has been started already. |
| 196 """ |
| 197 if self._start_connect is not None: |
| 198 raise TimeoutStateError("Timeout timer has already been started.") |
| 199 self._start_connect = current_time() |
| 200 return self._start_connect |
| 201 |
| 202 def get_connect_duration(self): |
| 203 """ Gets the time elapsed since the call to :meth:`start_connect`. |
| 204 |
| 205 :return: the elapsed time |
| 206 :rtype: float |
| 207 :raises urllib3.exceptions.TimeoutStateError: if you attempt |
| 208 to get duration for a timer that hasn't been started. |
| 209 """ |
| 210 if self._start_connect is None: |
| 211 raise TimeoutStateError("Can't get connect duration for timer " |
| 212 "that has not started.") |
| 213 return current_time() - self._start_connect |
| 214 |
| 215 @property |
| 216 def connect_timeout(self): |
| 217 """ Get the value to use when setting a connection timeout. |
| 218 |
| 219 This will be a positive float or integer, the value None |
| 220 (never timeout), or the default system timeout. |
| 221 |
| 222 :return: the connect timeout |
| 223 :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None |
| 224 """ |
| 225 if self.total is None: |
| 226 return self._connect |
| 227 |
| 228 if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: |
| 229 return self.total |
| 230 |
| 231 return min(self._connect, self.total) |
| 232 |
| 233 @property |
| 234 def read_timeout(self): |
| 235 """ Get the value for the read timeout. |
| 236 |
| 237 This assumes some time has elapsed in the connection timeout and |
| 238 computes the read timeout appropriately. |
| 239 |
| 240 If self.total is set, the read timeout is dependent on the amount of |
| 241 time taken by the connect timeout. If the connection time has not been |
| 242 established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be |
| 243 raised. |
| 244 |
| 245 :return: the value to use for the read timeout |
| 246 :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None |
| 247 :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` |
| 248 has not yet been called on this object. |
| 249 """ |
| 250 if (self.total is not None and |
| 251 self.total is not self.DEFAULT_TIMEOUT and |
| 252 self._read is not None and |
| 253 self._read is not self.DEFAULT_TIMEOUT): |
| 254 # in case the connect timeout has not yet been established. |
| 255 if self._start_connect is None: |
| 256 return self._read |
| 257 return max(0, min(self.total - self.get_connect_duration(), |
| 258 self._read)) |
| 259 elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: |
| 260 return max(0, self.total - self.get_connect_duration()) |
| 261 else: |
| 262 return self._read |
37 | 263 |
38 | 264 |
39 class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query',
'fragment'])): | 265 class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query',
'fragment'])): |
40 """ | 266 """ |
41 Datastructure for representing an HTTP URL. Used as a return value for | 267 Datastructure for representing an HTTP URL. Used as a return value for |
42 :func:`parse_url`. | 268 :func:`parse_url`. |
43 """ | 269 """ |
44 slots = () | 270 slots = () |
45 | 271 |
46 def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, qu
ery=None, fragment=None): | 272 def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, qu
ery=None, fragment=None): |
47 return super(Url, cls).__new__(cls, scheme, auth, host, port, path, quer
y, fragment) | 273 return super(Url, cls).__new__(cls, scheme, auth, host, port, path, quer
y, fragment) |
48 | 274 |
49 @property | 275 @property |
50 def hostname(self): | 276 def hostname(self): |
51 """For backwards-compatibility with urlparse. We're nice like that.""" | 277 """For backwards-compatibility with urlparse. We're nice like that.""" |
52 return self.host | 278 return self.host |
53 | 279 |
54 @property | 280 @property |
55 def request_uri(self): | 281 def request_uri(self): |
56 """Absolute path including the query string.""" | 282 """Absolute path including the query string.""" |
57 uri = self.path or '/' | 283 uri = self.path or '/' |
58 | 284 |
59 if self.query is not None: | 285 if self.query is not None: |
60 uri += '?' + self.query | 286 uri += '?' + self.query |
61 | 287 |
62 return uri | 288 return uri |
63 | 289 |
| 290 @property |
| 291 def netloc(self): |
| 292 """Network location including host and port""" |
| 293 if self.port: |
| 294 return '%s:%d' % (self.host, self.port) |
| 295 return self.host |
| 296 |
64 | 297 |
65 def split_first(s, delims): | 298 def split_first(s, delims): |
66 """ | 299 """ |
67 Given a string and an iterable of delimiters, split on the first found | 300 Given a string and an iterable of delimiters, split on the first found |
68 delimiter. Return two split parts and the matched delimiter. | 301 delimiter. Return two split parts and the matched delimiter. |
69 | 302 |
70 If not found, then the first part is the full input string. | 303 If not found, then the first part is the full input string. |
71 | 304 |
72 Example: :: | 305 Example: :: |
73 | 306 |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
107 >>> parse_url('http://google.com/mail/') | 340 >>> parse_url('http://google.com/mail/') |
108 Url(scheme='http', host='google.com', port=None, path='/', ...) | 341 Url(scheme='http', host='google.com', port=None, path='/', ...) |
109 >>> parse_url('google.com:80') | 342 >>> parse_url('google.com:80') |
110 Url(scheme=None, host='google.com', port=80, path=None, ...) | 343 Url(scheme=None, host='google.com', port=80, path=None, ...) |
111 >>> parse_url('/foo?bar') | 344 >>> parse_url('/foo?bar') |
112 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) | 345 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) |
113 """ | 346 """ |
114 | 347 |
115 # While this code has overlap with stdlib's urlparse, it is much | 348 # While this code has overlap with stdlib's urlparse, it is much |
116 # simplified for our needs and less annoying. | 349 # simplified for our needs and less annoying. |
117 # Additionally, this imeplementations does silly things to be optimal | 350 # Additionally, this implementations does silly things to be optimal |
118 # on CPython. | 351 # on CPython. |
119 | 352 |
120 scheme = None | 353 scheme = None |
121 auth = None | 354 auth = None |
122 host = None | 355 host = None |
123 port = None | 356 port = None |
124 path = None | 357 path = None |
125 fragment = None | 358 fragment = None |
126 query = None | 359 query = None |
127 | 360 |
128 # Scheme | 361 # Scheme |
129 if '://' in url: | 362 if '://' in url: |
130 scheme, url = url.split('://', 1) | 363 scheme, url = url.split('://', 1) |
131 | 364 |
132 # Find the earliest Authority Terminator | 365 # Find the earliest Authority Terminator |
133 # (http://tools.ietf.org/html/rfc3986#section-3.2) | 366 # (http://tools.ietf.org/html/rfc3986#section-3.2) |
134 url, path_, delim = split_first(url, ['/', '?', '#']) | 367 url, path_, delim = split_first(url, ['/', '?', '#']) |
135 | 368 |
136 if delim: | 369 if delim: |
137 # Reassemble the path | 370 # Reassemble the path |
138 path = delim + path_ | 371 path = delim + path_ |
139 | 372 |
140 # Auth | 373 # Auth |
141 if '@' in url: | 374 if '@' in url: |
142 auth, url = url.split('@', 1) | 375 auth, url = url.split('@', 1) |
143 | 376 |
144 # IPv6 | 377 # IPv6 |
145 if url and url[0] == '[': | 378 if url and url[0] == '[': |
146 host, url = url[1:].split(']', 1) | 379 host, url = url.split(']', 1) |
| 380 host += ']' |
147 | 381 |
148 # Port | 382 # Port |
149 if ':' in url: | 383 if ':' in url: |
150 _host, port = url.split(':', 1) | 384 _host, port = url.split(':', 1) |
151 | 385 |
152 if not host: | 386 if not host: |
153 host = _host | 387 host = _host |
154 | 388 |
155 if not port.isdigit(): | 389 if not port.isdigit(): |
156 raise LocationParseError("Failed to parse: %s" % url) | 390 raise LocationParseError("Failed to parse: %s" % url) |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
334 | 568 |
335 hashfunc = hashfunc_map[digest_length] | 569 hashfunc = hashfunc_map[digest_length] |
336 | 570 |
337 cert_digest = hashfunc(cert).digest() | 571 cert_digest = hashfunc(cert).digest() |
338 | 572 |
339 if not cert_digest == fingerprint_bytes: | 573 if not cert_digest == fingerprint_bytes: |
340 raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' | 574 raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' |
341 .format(hexlify(fingerprint_bytes), | 575 .format(hexlify(fingerprint_bytes), |
342 hexlify(cert_digest))) | 576 hexlify(cert_digest))) |
343 | 577 |
| 578 def is_fp_closed(obj): |
| 579 """ |
| 580 Checks whether a given file-like object is closed. |
| 581 |
| 582 :param obj: |
| 583 The file-like object to check. |
| 584 """ |
| 585 if hasattr(obj, 'fp'): |
| 586 # Object is a container for another file-like object that gets released |
| 587 # on exhaustion (e.g. HTTPResponse) |
| 588 return obj.fp is None |
| 589 |
| 590 return obj.closed |
| 591 |
344 | 592 |
345 if SSLContext is not None: # Python 3.2+ | 593 if SSLContext is not None: # Python 3.2+ |
346 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, | 594 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, |
347 ca_certs=None, server_hostname=None, | 595 ca_certs=None, server_hostname=None, |
348 ssl_version=None): | 596 ssl_version=None): |
349 """ | 597 """ |
350 All arguments except `server_hostname` have the same meaning as for | 598 All arguments except `server_hostname` have the same meaning as for |
351 :func:`ssl.wrap_socket` | 599 :func:`ssl.wrap_socket` |
352 | 600 |
353 :param server_hostname: | 601 :param server_hostname: |
(...skipping 15 matching lines...) Expand all Loading... |
369 return context.wrap_socket(sock, server_hostname=server_hostname) | 617 return context.wrap_socket(sock, server_hostname=server_hostname) |
370 return context.wrap_socket(sock) | 618 return context.wrap_socket(sock) |
371 | 619 |
372 else: # Python 3.1 and earlier | 620 else: # Python 3.1 and earlier |
373 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, | 621 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, |
374 ca_certs=None, server_hostname=None, | 622 ca_certs=None, server_hostname=None, |
375 ssl_version=None): | 623 ssl_version=None): |
376 return wrap_socket(sock, keyfile=keyfile, certfile=certfile, | 624 return wrap_socket(sock, keyfile=keyfile, certfile=certfile, |
377 ca_certs=ca_certs, cert_reqs=cert_reqs, | 625 ca_certs=ca_certs, cert_reqs=cert_reqs, |
378 ssl_version=ssl_version) | 626 ssl_version=ssl_version) |
OLD | NEW |