OLD | NEW |
(Empty) | |
| 1 from __future__ import absolute_import |
| 2 import collections |
| 3 import functools |
| 4 import logging |
| 5 |
| 6 from ._collections import RecentlyUsedContainer |
| 7 from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool |
| 8 from .connectionpool import port_by_scheme |
| 9 from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown |
| 10 from .packages.six.moves.urllib.parse import urljoin |
| 11 from .request import RequestMethods |
| 12 from .util.url import parse_url |
| 13 from .util.retry import Retry |
| 14 |
| 15 |
| 16 __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] |
| 17 |
| 18 |
| 19 log = logging.getLogger(__name__) |
| 20 |
| 21 SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', |
| 22 'ssl_version', 'ca_cert_dir', 'ssl_context') |
| 23 |
| 24 # The base fields to use when determining what pool to get a connection from; |
| 25 # these do not rely on the ``connection_pool_kw`` and can be determined by the |
| 26 # URL and potentially the ``urllib3.connection.port_by_scheme`` dictionary. |
| 27 # |
| 28 # All custom key schemes should include the fields in this key at a minimum. |
| 29 BasePoolKey = collections.namedtuple('BasePoolKey', ('scheme', 'host', 'port')) |
| 30 |
| 31 # The fields to use when determining what pool to get a HTTP and HTTPS |
| 32 # connection from. All additional fields must be present in the PoolManager's |
| 33 # ``connection_pool_kw`` instance variable. |
| 34 HTTPPoolKey = collections.namedtuple( |
| 35 'HTTPPoolKey', BasePoolKey._fields + ('timeout', 'retries', 'strict', |
| 36 'block', 'source_address') |
| 37 ) |
| 38 HTTPSPoolKey = collections.namedtuple( |
| 39 'HTTPSPoolKey', HTTPPoolKey._fields + SSL_KEYWORDS |
| 40 ) |
| 41 |
| 42 |
| 43 def _default_key_normalizer(key_class, request_context): |
| 44 """ |
| 45 Create a pool key of type ``key_class`` for a request. |
| 46 |
| 47 According to RFC 3986, both the scheme and host are case-insensitive. |
| 48 Therefore, this function normalizes both before constructing the pool |
| 49 key for an HTTPS request. If you wish to change this behaviour, provide |
| 50 alternate callables to ``key_fn_by_scheme``. |
| 51 |
| 52 :param key_class: |
| 53 The class to use when constructing the key. This should be a namedtuple |
| 54 with the ``scheme`` and ``host`` keys at a minimum. |
| 55 |
| 56 :param request_context: |
| 57 A dictionary-like object that contain the context for a request. |
| 58 It should contain a key for each field in the :class:`HTTPPoolKey` |
| 59 """ |
| 60 context = {} |
| 61 for key in key_class._fields: |
| 62 context[key] = request_context.get(key) |
| 63 context['scheme'] = context['scheme'].lower() |
| 64 context['host'] = context['host'].lower() |
| 65 return key_class(**context) |
| 66 |
| 67 |
| 68 # A dictionary that maps a scheme to a callable that creates a pool key. |
| 69 # This can be used to alter the way pool keys are constructed, if desired. |
| 70 # Each PoolManager makes a copy of this dictionary so they can be configured |
| 71 # globally here, or individually on the instance. |
| 72 key_fn_by_scheme = { |
| 73 'http': functools.partial(_default_key_normalizer, HTTPPoolKey), |
| 74 'https': functools.partial(_default_key_normalizer, HTTPSPoolKey), |
| 75 } |
| 76 |
| 77 pool_classes_by_scheme = { |
| 78 'http': HTTPConnectionPool, |
| 79 'https': HTTPSConnectionPool, |
| 80 } |
| 81 |
| 82 |
| 83 class PoolManager(RequestMethods): |
| 84 """ |
| 85 Allows for arbitrary requests while transparently keeping track of |
| 86 necessary connection pools for you. |
| 87 |
| 88 :param num_pools: |
| 89 Number of connection pools to cache before discarding the least |
| 90 recently used pool. |
| 91 |
| 92 :param headers: |
| 93 Headers to include with all requests, unless other headers are given |
| 94 explicitly. |
| 95 |
| 96 :param \\**connection_pool_kw: |
| 97 Additional parameters are used to create fresh |
| 98 :class:`urllib3.connectionpool.ConnectionPool` instances. |
| 99 |
| 100 Example:: |
| 101 |
| 102 >>> manager = PoolManager(num_pools=2) |
| 103 >>> r = manager.request('GET', 'http://google.com/') |
| 104 >>> r = manager.request('GET', 'http://google.com/mail') |
| 105 >>> r = manager.request('GET', 'http://yahoo.com/') |
| 106 >>> len(manager.pools) |
| 107 2 |
| 108 |
| 109 """ |
| 110 |
| 111 proxy = None |
| 112 |
| 113 def __init__(self, num_pools=10, headers=None, **connection_pool_kw): |
| 114 RequestMethods.__init__(self, headers) |
| 115 self.connection_pool_kw = connection_pool_kw |
| 116 self.pools = RecentlyUsedContainer(num_pools, |
| 117 dispose_func=lambda p: p.close()) |
| 118 |
| 119 # Locally set the pool classes and keys so other PoolManagers can |
| 120 # override them. |
| 121 self.pool_classes_by_scheme = pool_classes_by_scheme |
| 122 self.key_fn_by_scheme = key_fn_by_scheme.copy() |
| 123 |
| 124 def __enter__(self): |
| 125 return self |
| 126 |
| 127 def __exit__(self, exc_type, exc_val, exc_tb): |
| 128 self.clear() |
| 129 # Return False to re-raise any potential exceptions |
| 130 return False |
| 131 |
| 132 def _new_pool(self, scheme, host, port): |
| 133 """ |
| 134 Create a new :class:`ConnectionPool` based on host, port and scheme. |
| 135 |
| 136 This method is used to actually create the connection pools handed out |
| 137 by :meth:`connection_from_url` and companion methods. It is intended |
| 138 to be overridden for customization. |
| 139 """ |
| 140 pool_cls = self.pool_classes_by_scheme[scheme] |
| 141 kwargs = self.connection_pool_kw |
| 142 if scheme == 'http': |
| 143 kwargs = self.connection_pool_kw.copy() |
| 144 for kw in SSL_KEYWORDS: |
| 145 kwargs.pop(kw, None) |
| 146 |
| 147 return pool_cls(host, port, **kwargs) |
| 148 |
| 149 def clear(self): |
| 150 """ |
| 151 Empty our store of pools and direct them all to close. |
| 152 |
| 153 This will not affect in-flight connections, but they will not be |
| 154 re-used after completion. |
| 155 """ |
| 156 self.pools.clear() |
| 157 |
| 158 def connection_from_host(self, host, port=None, scheme='http'): |
| 159 """ |
| 160 Get a :class:`ConnectionPool` based on the host, port, and scheme. |
| 161 |
| 162 If ``port`` isn't given, it will be derived from the ``scheme`` using |
| 163 ``urllib3.connectionpool.port_by_scheme``. |
| 164 """ |
| 165 |
| 166 if not host: |
| 167 raise LocationValueError("No host specified.") |
| 168 |
| 169 request_context = self.connection_pool_kw.copy() |
| 170 request_context['scheme'] = scheme or 'http' |
| 171 if not port: |
| 172 port = port_by_scheme.get(request_context['scheme'].lower(), 80) |
| 173 request_context['port'] = port |
| 174 request_context['host'] = host |
| 175 |
| 176 return self.connection_from_context(request_context) |
| 177 |
| 178 def connection_from_context(self, request_context): |
| 179 """ |
| 180 Get a :class:`ConnectionPool` based on the request context. |
| 181 |
| 182 ``request_context`` must at least contain the ``scheme`` key and its |
| 183 value must be a key in ``key_fn_by_scheme`` instance variable. |
| 184 """ |
| 185 scheme = request_context['scheme'].lower() |
| 186 pool_key_constructor = self.key_fn_by_scheme[scheme] |
| 187 pool_key = pool_key_constructor(request_context) |
| 188 |
| 189 return self.connection_from_pool_key(pool_key) |
| 190 |
| 191 def connection_from_pool_key(self, pool_key): |
| 192 """ |
| 193 Get a :class:`ConnectionPool` based on the provided pool key. |
| 194 |
| 195 ``pool_key`` should be a namedtuple that only contains immutable |
| 196 objects. At a minimum it must have the ``scheme``, ``host``, and |
| 197 ``port`` fields. |
| 198 """ |
| 199 with self.pools.lock: |
| 200 # If the scheme, host, or port doesn't match existing open |
| 201 # connections, open a new ConnectionPool. |
| 202 pool = self.pools.get(pool_key) |
| 203 if pool: |
| 204 return pool |
| 205 |
| 206 # Make a fresh ConnectionPool of the desired type |
| 207 pool = self._new_pool(pool_key.scheme, pool_key.host, pool_key.port) |
| 208 self.pools[pool_key] = pool |
| 209 |
| 210 return pool |
| 211 |
| 212 def connection_from_url(self, url): |
| 213 """ |
| 214 Similar to :func:`urllib3.connectionpool.connection_from_url` but |
| 215 doesn't pass any additional parameters to the |
| 216 :class:`urllib3.connectionpool.ConnectionPool` constructor. |
| 217 |
| 218 Additional parameters are taken from the :class:`.PoolManager` |
| 219 constructor. |
| 220 """ |
| 221 u = parse_url(url) |
| 222 return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) |
| 223 |
| 224 def urlopen(self, method, url, redirect=True, **kw): |
| 225 """ |
| 226 Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` |
| 227 with custom cross-host redirect logic and only sends the request-uri |
| 228 portion of the ``url``. |
| 229 |
| 230 The given ``url`` parameter must be absolute, such that an appropriate |
| 231 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. |
| 232 """ |
| 233 u = parse_url(url) |
| 234 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) |
| 235 |
| 236 kw['assert_same_host'] = False |
| 237 kw['redirect'] = False |
| 238 if 'headers' not in kw: |
| 239 kw['headers'] = self.headers |
| 240 |
| 241 if self.proxy is not None and u.scheme == "http": |
| 242 response = conn.urlopen(method, url, **kw) |
| 243 else: |
| 244 response = conn.urlopen(method, u.request_uri, **kw) |
| 245 |
| 246 redirect_location = redirect and response.get_redirect_location() |
| 247 if not redirect_location: |
| 248 return response |
| 249 |
| 250 # Support relative URLs for redirecting. |
| 251 redirect_location = urljoin(url, redirect_location) |
| 252 |
| 253 # RFC 7231, Section 6.4.4 |
| 254 if response.status == 303: |
| 255 method = 'GET' |
| 256 |
| 257 retries = kw.get('retries') |
| 258 if not isinstance(retries, Retry): |
| 259 retries = Retry.from_int(retries, redirect=redirect) |
| 260 |
| 261 try: |
| 262 retries = retries.increment(method, url, response=response, _pool=co
nn) |
| 263 except MaxRetryError: |
| 264 if retries.raise_on_redirect: |
| 265 raise |
| 266 return response |
| 267 |
| 268 kw['retries'] = retries |
| 269 kw['redirect'] = redirect |
| 270 |
| 271 log.info("Redirecting %s -> %s", url, redirect_location) |
| 272 return self.urlopen(method, redirect_location, **kw) |
| 273 |
| 274 |
| 275 class ProxyManager(PoolManager): |
| 276 """ |
| 277 Behaves just like :class:`PoolManager`, but sends all requests through |
| 278 the defined proxy, using the CONNECT method for HTTPS URLs. |
| 279 |
| 280 :param proxy_url: |
| 281 The URL of the proxy to be used. |
| 282 |
| 283 :param proxy_headers: |
| 284 A dictionary contaning headers that will be sent to the proxy. In case |
| 285 of HTTP they are being sent with each request, while in the |
| 286 HTTPS/CONNECT case they are sent only once. Could be used for proxy |
| 287 authentication. |
| 288 |
| 289 Example: |
| 290 >>> proxy = urllib3.ProxyManager('http://localhost:3128/') |
| 291 >>> r1 = proxy.request('GET', 'http://google.com/') |
| 292 >>> r2 = proxy.request('GET', 'http://httpbin.org/') |
| 293 >>> len(proxy.pools) |
| 294 1 |
| 295 >>> r3 = proxy.request('GET', 'https://httpbin.org/') |
| 296 >>> r4 = proxy.request('GET', 'https://twitter.com/') |
| 297 >>> len(proxy.pools) |
| 298 3 |
| 299 |
| 300 """ |
| 301 |
| 302 def __init__(self, proxy_url, num_pools=10, headers=None, |
| 303 proxy_headers=None, **connection_pool_kw): |
| 304 |
| 305 if isinstance(proxy_url, HTTPConnectionPool): |
| 306 proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, |
| 307 proxy_url.port) |
| 308 proxy = parse_url(proxy_url) |
| 309 if not proxy.port: |
| 310 port = port_by_scheme.get(proxy.scheme, 80) |
| 311 proxy = proxy._replace(port=port) |
| 312 |
| 313 if proxy.scheme not in ("http", "https"): |
| 314 raise ProxySchemeUnknown(proxy.scheme) |
| 315 |
| 316 self.proxy = proxy |
| 317 self.proxy_headers = proxy_headers or {} |
| 318 |
| 319 connection_pool_kw['_proxy'] = self.proxy |
| 320 connection_pool_kw['_proxy_headers'] = self.proxy_headers |
| 321 |
| 322 super(ProxyManager, self).__init__( |
| 323 num_pools, headers, **connection_pool_kw) |
| 324 |
| 325 def connection_from_host(self, host, port=None, scheme='http'): |
| 326 if scheme == "https": |
| 327 return super(ProxyManager, self).connection_from_host( |
| 328 host, port, scheme) |
| 329 |
| 330 return super(ProxyManager, self).connection_from_host( |
| 331 self.proxy.host, self.proxy.port, self.proxy.scheme) |
| 332 |
| 333 def _set_proxy_headers(self, url, headers=None): |
| 334 """ |
| 335 Sets headers needed by proxies: specifically, the Accept and Host |
| 336 headers. Only sets headers not provided by the user. |
| 337 """ |
| 338 headers_ = {'Accept': '*/*'} |
| 339 |
| 340 netloc = parse_url(url).netloc |
| 341 if netloc: |
| 342 headers_['Host'] = netloc |
| 343 |
| 344 if headers: |
| 345 headers_.update(headers) |
| 346 return headers_ |
| 347 |
| 348 def urlopen(self, method, url, redirect=True, **kw): |
| 349 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." |
| 350 u = parse_url(url) |
| 351 |
| 352 if u.scheme == "http": |
| 353 # For proxied HTTPS requests, httplib sets the necessary headers |
| 354 # on the CONNECT to the proxy. For HTTP, we'll definitely |
| 355 # need to set 'Host' at the very least. |
| 356 headers = kw.get('headers', self.headers) |
| 357 kw['headers'] = self._set_proxy_headers(url, headers) |
| 358 |
| 359 return super(ProxyManager, self).urlopen(method, url, redirect=redirect,
**kw) |
| 360 |
| 361 |
| 362 def proxy_from_url(url, **kw): |
| 363 return ProxyManager(proxy_url=url, **kw) |
OLD | NEW |