OLD | NEW |
(Empty) | |
| 1 # urllib3/connectionpool.py |
| 2 # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) |
| 3 # |
| 4 # This module is part of urllib3 and is released under |
| 5 # the MIT License: http://www.opensource.org/licenses/mit-license.php |
| 6 |
| 7 import logging |
| 8 import socket |
| 9 import errno |
| 10 |
| 11 from socket import error as SocketError, timeout as SocketTimeout |
| 12 from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint |
| 13 |
| 14 try: # Python 3 |
| 15 from http.client import HTTPConnection, HTTPException |
| 16 from http.client import HTTP_PORT, HTTPS_PORT |
| 17 except ImportError: |
| 18 from httplib import HTTPConnection, HTTPException |
| 19 from httplib import HTTP_PORT, HTTPS_PORT |
| 20 |
| 21 try: # Python 3 |
| 22 from queue import LifoQueue, Empty, Full |
| 23 except ImportError: |
| 24 from Queue import LifoQueue, Empty, Full |
| 25 |
| 26 |
| 27 try: # Compiled with SSL? |
| 28 HTTPSConnection = object |
| 29 BaseSSLError = None |
| 30 ssl = None |
| 31 |
| 32 try: # Python 3 |
| 33 from http.client import HTTPSConnection |
| 34 except ImportError: |
| 35 from httplib import HTTPSConnection |
| 36 |
| 37 import ssl |
| 38 BaseSSLError = ssl.SSLError |
| 39 |
| 40 except (ImportError, AttributeError): # Platform-specific: No SSL. |
| 41 pass |
| 42 |
| 43 |
| 44 from .request import RequestMethods |
| 45 from .response import HTTPResponse |
| 46 from .util import get_host, is_connection_dropped, ssl_wrap_socket |
| 47 from .exceptions import ( |
| 48 ClosedPoolError, |
| 49 EmptyPoolError, |
| 50 HostChangedError, |
| 51 MaxRetryError, |
| 52 SSLError, |
| 53 TimeoutError, |
| 54 ) |
| 55 |
| 56 from .packages.ssl_match_hostname import match_hostname, CertificateError |
| 57 from .packages import six |
| 58 |
| 59 |
| 60 xrange = six.moves.xrange |
| 61 |
| 62 log = logging.getLogger(__name__) |
| 63 |
| 64 _Default = object() |
| 65 |
| 66 port_by_scheme = { |
| 67 'http': HTTP_PORT, |
| 68 'https': HTTPS_PORT, |
| 69 } |
| 70 |
| 71 |
| 72 ## Connection objects (extension of httplib) |
| 73 |
| 74 class VerifiedHTTPSConnection(HTTPSConnection): |
| 75 """ |
| 76 Based on httplib.HTTPSConnection but wraps the socket with |
| 77 SSL certification. |
| 78 """ |
| 79 cert_reqs = None |
| 80 ca_certs = None |
| 81 ssl_version = None |
| 82 |
| 83 def set_cert(self, key_file=None, cert_file=None, |
| 84 cert_reqs=None, ca_certs=None, |
| 85 assert_hostname=None, assert_fingerprint=None): |
| 86 |
| 87 self.key_file = key_file |
| 88 self.cert_file = cert_file |
| 89 self.cert_reqs = cert_reqs |
| 90 self.ca_certs = ca_certs |
| 91 self.assert_hostname = assert_hostname |
| 92 self.assert_fingerprint = assert_fingerprint |
| 93 |
| 94 def connect(self): |
| 95 # Add certificate verification |
| 96 sock = socket.create_connection((self.host, self.port), self.timeout) |
| 97 |
| 98 resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) |
| 99 resolved_ssl_version = resolve_ssl_version(self.ssl_version) |
| 100 |
| 101 # Wrap socket using verification with the root certs in |
| 102 # trusted_root_certs |
| 103 self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, |
| 104 cert_reqs=resolved_cert_reqs, |
| 105 ca_certs=self.ca_certs, |
| 106 server_hostname=self.host, |
| 107 ssl_version=resolved_ssl_version) |
| 108 |
| 109 if resolved_cert_reqs != ssl.CERT_NONE: |
| 110 if self.assert_fingerprint: |
| 111 assert_fingerprint(self.sock.getpeercert(binary_form=True), |
| 112 self.assert_fingerprint) |
| 113 else: |
| 114 match_hostname(self.sock.getpeercert(), |
| 115 self.assert_hostname or self.host) |
| 116 |
| 117 ## Pool objects |
| 118 |
| 119 class ConnectionPool(object): |
| 120 """ |
| 121 Base class for all connection pools, such as |
| 122 :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. |
| 123 """ |
| 124 |
| 125 scheme = None |
| 126 QueueCls = LifoQueue |
| 127 |
| 128 def __init__(self, host, port=None): |
| 129 self.host = host |
| 130 self.port = port |
| 131 |
| 132 def __str__(self): |
| 133 return '%s(host=%r, port=%r)' % (type(self).__name__, |
| 134 self.host, self.port) |
| 135 |
| 136 |
| 137 class HTTPConnectionPool(ConnectionPool, RequestMethods): |
| 138 """ |
| 139 Thread-safe connection pool for one host. |
| 140 |
| 141 :param host: |
| 142 Host used for this HTTP Connection (e.g. "localhost"), passed into |
| 143 :class:`httplib.HTTPConnection`. |
| 144 |
| 145 :param port: |
| 146 Port used for this HTTP Connection (None is equivalent to 80), passed |
| 147 into :class:`httplib.HTTPConnection`. |
| 148 |
| 149 :param strict: |
| 150 Causes BadStatusLine to be raised if the status line can't be parsed |
| 151 as a valid HTTP/1.0 or 1.1 status line, passed into |
| 152 :class:`httplib.HTTPConnection`. |
| 153 |
| 154 :param timeout: |
| 155 Socket timeout for each individual connection, can be a float. None |
| 156 disables timeout. |
| 157 |
| 158 :param maxsize: |
| 159 Number of connections to save that can be reused. More than 1 is useful |
| 160 in multithreaded situations. If ``block`` is set to false, more |
| 161 connections will be created but they will not be saved once they've |
| 162 been used. |
| 163 |
| 164 :param block: |
| 165 If set to True, no more than ``maxsize`` connections will be used at |
| 166 a time. When no free connections are available, the call will block |
| 167 until a connection has been released. This is a useful side effect for |
| 168 particular multithreaded situations where one does not want to use more |
| 169 than maxsize connections per host to prevent flooding. |
| 170 |
| 171 :param headers: |
| 172 Headers to include with all requests, unless other headers are given |
| 173 explicitly. |
| 174 """ |
| 175 |
| 176 scheme = 'http' |
| 177 |
| 178 def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, |
| 179 block=False, headers=None): |
| 180 ConnectionPool.__init__(self, host, port) |
| 181 RequestMethods.__init__(self, headers) |
| 182 |
| 183 self.strict = strict |
| 184 self.timeout = timeout |
| 185 self.pool = self.QueueCls(maxsize) |
| 186 self.block = block |
| 187 |
| 188 # Fill the queue up so that doing get() on it will block properly |
| 189 for _ in xrange(maxsize): |
| 190 self.pool.put(None) |
| 191 |
| 192 # These are mostly for testing and debugging purposes. |
| 193 self.num_connections = 0 |
| 194 self.num_requests = 0 |
| 195 |
| 196 def _new_conn(self): |
| 197 """ |
| 198 Return a fresh :class:`httplib.HTTPConnection`. |
| 199 """ |
| 200 self.num_connections += 1 |
| 201 log.info("Starting new HTTP connection (%d): %s" % |
| 202 (self.num_connections, self.host)) |
| 203 return HTTPConnection(host=self.host, |
| 204 port=self.port, |
| 205 strict=self.strict) |
| 206 |
| 207 def _get_conn(self, timeout=None): |
| 208 """ |
| 209 Get a connection. Will return a pooled connection if one is available. |
| 210 |
| 211 If no connections are available and :prop:`.block` is ``False``, then a |
| 212 fresh connection is returned. |
| 213 |
| 214 :param timeout: |
| 215 Seconds to wait before giving up and raising |
| 216 :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and |
| 217 :prop:`.block` is ``True``. |
| 218 """ |
| 219 conn = None |
| 220 try: |
| 221 conn = self.pool.get(block=self.block, timeout=timeout) |
| 222 |
| 223 except AttributeError: # self.pool is None |
| 224 raise ClosedPoolError(self, "Pool is closed.") |
| 225 |
| 226 except Empty: |
| 227 if self.block: |
| 228 raise EmptyPoolError(self, |
| 229 "Pool reached maximum size and no more " |
| 230 "connections are allowed.") |
| 231 pass # Oh well, we'll create a new connection then |
| 232 |
| 233 # If this is a persistent connection, check if it got disconnected |
| 234 if conn and is_connection_dropped(conn): |
| 235 log.info("Resetting dropped connection: %s" % self.host) |
| 236 conn.close() |
| 237 |
| 238 return conn or self._new_conn() |
| 239 |
| 240 def _put_conn(self, conn): |
| 241 """ |
| 242 Put a connection back into the pool. |
| 243 |
| 244 :param conn: |
| 245 Connection object for the current host and port as returned by |
| 246 :meth:`._new_conn` or :meth:`._get_conn`. |
| 247 |
| 248 If the pool is already full, the connection is closed and discarded |
| 249 because we exceeded maxsize. If connections are discarded frequently, |
| 250 then maxsize should be increased. |
| 251 |
| 252 If the pool is closed, then the connection will be closed and discarded. |
| 253 """ |
| 254 try: |
| 255 self.pool.put(conn, block=False) |
| 256 return # Everything is dandy, done. |
| 257 except AttributeError: |
| 258 # self.pool is None. |
| 259 pass |
| 260 except Full: |
| 261 # This should never happen if self.block == True |
| 262 log.warning("HttpConnectionPool is full, discarding connection: %s" |
| 263 % self.host) |
| 264 |
| 265 # Connection never got put back into the pool, close it. |
| 266 conn.close() |
| 267 |
| 268 def _make_request(self, conn, method, url, timeout=_Default, |
| 269 **httplib_request_kw): |
| 270 """ |
| 271 Perform a request on a given httplib connection object taken from our |
| 272 pool. |
| 273 """ |
| 274 self.num_requests += 1 |
| 275 |
| 276 if timeout is _Default: |
| 277 timeout = self.timeout |
| 278 |
| 279 conn.timeout = timeout # This only does anything in Py26+ |
| 280 conn.request(method, url, **httplib_request_kw) |
| 281 |
| 282 # Set timeout |
| 283 sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr. |
| 284 if sock: |
| 285 sock.settimeout(timeout) |
| 286 |
| 287 try: # Python 2.7+, use buffering of HTTP responses |
| 288 httplib_response = conn.getresponse(buffering=True) |
| 289 except TypeError: # Python 2.6 and older |
| 290 httplib_response = conn.getresponse() |
| 291 |
| 292 # AppEngine doesn't have a version attr. |
| 293 http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') |
| 294 log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, |
| 295 httplib_response.status, |
| 296 httplib_response.length)) |
| 297 return httplib_response |
| 298 |
| 299 def close(self): |
| 300 """ |
| 301 Close all pooled connections and disable the pool. |
| 302 """ |
| 303 # Disable access to the pool |
| 304 old_pool, self.pool = self.pool, None |
| 305 |
| 306 try: |
| 307 while True: |
| 308 conn = old_pool.get(block=False) |
| 309 if conn: |
| 310 conn.close() |
| 311 |
| 312 except Empty: |
| 313 pass # Done. |
| 314 |
| 315 def is_same_host(self, url): |
| 316 """ |
| 317 Check if the given ``url`` is a member of the same host as this |
| 318 connection pool. |
| 319 """ |
| 320 if url.startswith('/'): |
| 321 return True |
| 322 |
| 323 # TODO: Add optional support for socket.gethostbyname checking. |
| 324 scheme, host, port = get_host(url) |
| 325 |
| 326 if self.port and not port: |
| 327 # Use explicit default port for comparison when none is given. |
| 328 port = port_by_scheme.get(scheme) |
| 329 |
| 330 return (scheme, host, port) == (self.scheme, self.host, self.port) |
| 331 |
| 332 def urlopen(self, method, url, body=None, headers=None, retries=3, |
| 333 redirect=True, assert_same_host=True, timeout=_Default, |
| 334 pool_timeout=None, release_conn=None, **response_kw): |
| 335 """ |
| 336 Get a connection from the pool and perform an HTTP request. This is the |
| 337 lowest level call for making a request, so you'll need to specify all |
| 338 the raw details. |
| 339 |
| 340 .. note:: |
| 341 |
| 342 More commonly, it's appropriate to use a convenience method provided |
| 343 by :class:`.RequestMethods`, such as :meth:`request`. |
| 344 |
| 345 .. note:: |
| 346 |
| 347 `release_conn` will only behave as expected if |
| 348 `preload_content=False` because we want to make |
| 349 `preload_content=False` the default behaviour someday soon without |
| 350 breaking backwards compatibility. |
| 351 |
| 352 :param method: |
| 353 HTTP request method (such as GET, POST, PUT, etc.) |
| 354 |
| 355 :param body: |
| 356 Data to send in the request body (useful for creating |
| 357 POST requests, see HTTPConnectionPool.post_url for |
| 358 more convenience). |
| 359 |
| 360 :param headers: |
| 361 Dictionary of custom headers to send, such as User-Agent, |
| 362 If-None-Match, etc. If None, pool headers are used. If provided, |
| 363 these headers completely replace any pool-specific headers. |
| 364 |
| 365 :param retries: |
| 366 Number of retries to allow before raising a MaxRetryError exception. |
| 367 |
| 368 :param redirect: |
| 369 If True, automatically handle redirects (status codes 301, 302, |
| 370 303, 307). Each redirect counts as a retry. |
| 371 |
| 372 :param assert_same_host: |
| 373 If ``True``, will make sure that the host of the pool requests is |
| 374 consistent else will raise HostChangedError. When False, you can |
| 375 use the pool on an HTTP proxy and request foreign hosts. |
| 376 |
| 377 :param timeout: |
| 378 If specified, overrides the default timeout for this one request. |
| 379 |
| 380 :param pool_timeout: |
| 381 If set and the pool is set to block=True, then this method will |
| 382 block for ``pool_timeout`` seconds and raise EmptyPoolError if no |
| 383 connection is available within the time period. |
| 384 |
| 385 :param release_conn: |
| 386 If False, then the urlopen call will not release the connection |
| 387 back into the pool once a response is received (but will release if |
| 388 you read the entire contents of the response such as when |
| 389 `preload_content=True`). This is useful if you're not preloading |
| 390 the response's content immediately. You will need to call |
| 391 ``r.release_conn()`` on the response ``r`` to return the connection |
| 392 back into the pool. If None, it takes the value of |
| 393 ``response_kw.get('preload_content', True)``. |
| 394 |
| 395 :param \**response_kw: |
| 396 Additional parameters are passed to |
| 397 :meth:`urllib3.response.HTTPResponse.from_httplib` |
| 398 """ |
| 399 if headers is None: |
| 400 headers = self.headers |
| 401 |
| 402 if retries < 0: |
| 403 raise MaxRetryError(self, url) |
| 404 |
| 405 if timeout is _Default: |
| 406 timeout = self.timeout |
| 407 |
| 408 if release_conn is None: |
| 409 release_conn = response_kw.get('preload_content', True) |
| 410 |
| 411 # Check host |
| 412 if assert_same_host and not self.is_same_host(url): |
| 413 host = "%s://%s" % (self.scheme, self.host) |
| 414 if self.port: |
| 415 host = "%s:%d" % (host, self.port) |
| 416 |
| 417 raise HostChangedError(self, url, retries - 1) |
| 418 |
| 419 conn = None |
| 420 |
| 421 try: |
| 422 # Request a connection from the queue |
| 423 conn = self._get_conn(timeout=pool_timeout) |
| 424 |
| 425 # Make the request on the httplib connection object |
| 426 httplib_response = self._make_request(conn, method, url, |
| 427 timeout=timeout, |
| 428 body=body, headers=headers) |
| 429 |
| 430 # If we're going to release the connection in ``finally:``, then |
| 431 # the request doesn't need to know about the connection. Otherwise |
| 432 # it will also try to release it and we'll have a double-release |
| 433 # mess. |
| 434 response_conn = not release_conn and conn |
| 435 |
| 436 # Import httplib's response into our own wrapper object |
| 437 response = HTTPResponse.from_httplib(httplib_response, |
| 438 pool=self, |
| 439 connection=response_conn, |
| 440 **response_kw) |
| 441 |
| 442 # else: |
| 443 # The connection will be put back into the pool when |
| 444 # ``response.release_conn()`` is called (implicitly by |
| 445 # ``response.read()``) |
| 446 |
| 447 except Empty as e: |
| 448 # Timed out by queue |
| 449 raise TimeoutError(self, url, |
| 450 "Request timed out. (pool_timeout=%s)" % |
| 451 pool_timeout) |
| 452 |
| 453 except SocketTimeout as e: |
| 454 # Timed out by socket |
| 455 raise TimeoutError(self, url, |
| 456 "Request timed out. (timeout=%s)" % |
| 457 timeout) |
| 458 |
| 459 except BaseSSLError as e: |
| 460 # SSL certificate error |
| 461 raise SSLError(e) |
| 462 |
| 463 except CertificateError as e: |
| 464 # Name mismatch |
| 465 raise SSLError(e) |
| 466 |
| 467 except (HTTPException, SocketError) as e: |
| 468 # Connection broken, discard. It will be replaced next _get_conn(). |
| 469 conn = None |
| 470 # This is necessary so we can access e below |
| 471 err = e |
| 472 |
| 473 if retries == 0: |
| 474 raise MaxRetryError(self, url, e) |
| 475 |
| 476 finally: |
| 477 if release_conn: |
| 478 # Put the connection back to be reused. If the connection is |
| 479 # expired then it will be None, which will get replaced with a |
| 480 # fresh connection during _get_conn. |
| 481 self._put_conn(conn) |
| 482 |
| 483 if not conn: |
| 484 # Try again |
| 485 log.warn("Retrying (%d attempts remain) after connection " |
| 486 "broken by '%r': %s" % (retries, err, url)) |
| 487 return self.urlopen(method, url, body, headers, retries - 1, |
| 488 redirect, assert_same_host, |
| 489 timeout=timeout, pool_timeout=pool_timeout, |
| 490 release_conn=release_conn, **response_kw) |
| 491 |
| 492 # Handle redirect? |
| 493 redirect_location = redirect and response.get_redirect_location() |
| 494 if redirect_location: |
| 495 if response.status == 303: |
| 496 method = 'GET' |
| 497 log.info("Redirecting %s -> %s" % (url, redirect_location)) |
| 498 return self.urlopen(method, redirect_location, body, headers, |
| 499 retries - 1, redirect, assert_same_host, |
| 500 timeout=timeout, pool_timeout=pool_timeout, |
| 501 release_conn=release_conn, **response_kw) |
| 502 |
| 503 return response |
| 504 |
| 505 |
| 506 class HTTPSConnectionPool(HTTPConnectionPool): |
| 507 """ |
| 508 Same as :class:`.HTTPConnectionPool`, but HTTPS. |
| 509 |
| 510 When Python is compiled with the :mod:`ssl` module, then |
| 511 :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, |
| 512 instead of :class:`httplib.HTTPSConnection`. |
| 513 |
| 514 :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, |
| 515 ``assert_hostname`` and ``host`` in this order to verify connections. |
| 516 |
| 517 The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and |
| 518 ``ssl_version`` are only used if :mod:`ssl` is available and are fed into |
| 519 :meth:`urllib3.util.ssl_wrap_socket` to upgrade the connection socket |
| 520 into an SSL socket. |
| 521 """ |
| 522 |
| 523 scheme = 'https' |
| 524 |
| 525 def __init__(self, host, port=None, |
| 526 strict=False, timeout=None, maxsize=1, |
| 527 block=False, headers=None, |
| 528 key_file=None, cert_file=None, cert_reqs=None, |
| 529 ca_certs=None, ssl_version=None, |
| 530 assert_hostname=None, assert_fingerprint=None): |
| 531 |
| 532 HTTPConnectionPool.__init__(self, host, port, |
| 533 strict, timeout, maxsize, |
| 534 block, headers) |
| 535 self.key_file = key_file |
| 536 self.cert_file = cert_file |
| 537 self.cert_reqs = cert_reqs |
| 538 self.ca_certs = ca_certs |
| 539 self.ssl_version = ssl_version |
| 540 self.assert_hostname = assert_hostname |
| 541 self.assert_fingerprint = assert_fingerprint |
| 542 |
| 543 def _new_conn(self): |
| 544 """ |
| 545 Return a fresh :class:`httplib.HTTPSConnection`. |
| 546 """ |
| 547 self.num_connections += 1 |
| 548 log.info("Starting new HTTPS connection (%d): %s" |
| 549 % (self.num_connections, self.host)) |
| 550 |
| 551 if not ssl: # Platform-specific: Python compiled without +ssl |
| 552 if not HTTPSConnection or HTTPSConnection is object: |
| 553 raise SSLError("Can't connect to HTTPS URL because the SSL " |
| 554 "module is not available.") |
| 555 |
| 556 return HTTPSConnection(host=self.host, |
| 557 port=self.port, |
| 558 strict=self.strict) |
| 559 |
| 560 connection = VerifiedHTTPSConnection(host=self.host, |
| 561 port=self.port, |
| 562 strict=self.strict) |
| 563 connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, |
| 564 cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, |
| 565 assert_hostname=self.assert_hostname, |
| 566 assert_fingerprint=self.assert_fingerprint) |
| 567 |
| 568 connection.ssl_version = self.ssl_version |
| 569 |
| 570 return connection |
| 571 |
| 572 |
| 573 def connection_from_url(url, **kw): |
| 574 """ |
| 575 Given a url, return an :class:`.ConnectionPool` instance of its host. |
| 576 |
| 577 This is a shortcut for not having to parse out the scheme, host, and port |
| 578 of the url before creating an :class:`.ConnectionPool` instance. |
| 579 |
| 580 :param url: |
| 581 Absolute URL string that must include the scheme. Port is optional. |
| 582 |
| 583 :param \**kw: |
| 584 Passes additional parameters to the constructor of the appropriate |
| 585 :class:`.ConnectionPool`. Useful for specifying things like |
| 586 timeout, maxsize, headers, etc. |
| 587 |
| 588 Example: :: |
| 589 |
| 590 >>> conn = connection_from_url('http://google.com/') |
| 591 >>> r = conn.request('GET', '/') |
| 592 """ |
| 593 scheme, host, port = get_host(url) |
| 594 if scheme == 'https': |
| 595 return HTTPSConnectionPool(host, port=port, **kw) |
| 596 else: |
| 597 return HTTPConnectionPool(host, port=port, **kw) |
OLD | NEW |