OLD | NEW |
| (Empty) |
1 # urllib3/poolmanager.py | |
2 # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) | |
3 # | |
4 # This module is part of urllib3 and is released under | |
5 # the MIT License: http://www.opensource.org/licenses/mit-license.php | |
6 | |
7 import logging | |
8 | |
9 try: # Python 3 | |
10 from urllib.parse import urljoin | |
11 except ImportError: | |
12 from urlparse import urljoin | |
13 | |
14 from ._collections import RecentlyUsedContainer | |
15 from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool | |
16 from .connectionpool import port_by_scheme | |
17 from .request import RequestMethods | |
18 from .util import parse_url | |
19 | |
20 | |
21 __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] | |
22 | |
23 | |
24 pool_classes_by_scheme = { | |
25 'http': HTTPConnectionPool, | |
26 'https': HTTPSConnectionPool, | |
27 } | |
28 | |
29 log = logging.getLogger(__name__) | |
30 | |
31 SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', | |
32 'ssl_version') | |
33 | |
34 | |
35 class PoolManager(RequestMethods): | |
36 """ | |
37 Allows for arbitrary requests while transparently keeping track of | |
38 necessary connection pools for you. | |
39 | |
40 :param num_pools: | |
41 Number of connection pools to cache before discarding the least | |
42 recently used pool. | |
43 | |
44 :param headers: | |
45 Headers to include with all requests, unless other headers are given | |
46 explicitly. | |
47 | |
48 :param \**connection_pool_kw: | |
49 Additional parameters are used to create fresh | |
50 :class:`urllib3.connectionpool.ConnectionPool` instances. | |
51 | |
52 Example: :: | |
53 | |
54 >>> manager = PoolManager(num_pools=2) | |
55 >>> r = manager.request('GET', 'http://google.com/') | |
56 >>> r = manager.request('GET', 'http://google.com/mail') | |
57 >>> r = manager.request('GET', 'http://yahoo.com/') | |
58 >>> len(manager.pools) | |
59 2 | |
60 | |
61 """ | |
62 | |
63 proxy = None | |
64 | |
65 def __init__(self, num_pools=10, headers=None, **connection_pool_kw): | |
66 RequestMethods.__init__(self, headers) | |
67 self.connection_pool_kw = connection_pool_kw | |
68 self.pools = RecentlyUsedContainer(num_pools, | |
69 dispose_func=lambda p: p.close()) | |
70 | |
71 def _new_pool(self, scheme, host, port): | |
72 """ | |
73 Create a new :class:`ConnectionPool` based on host, port and scheme. | |
74 | |
75 This method is used to actually create the connection pools handed out | |
76 by :meth:`connection_from_url` and companion methods. It is intended | |
77 to be overridden for customization. | |
78 """ | |
79 pool_cls = pool_classes_by_scheme[scheme] | |
80 kwargs = self.connection_pool_kw | |
81 if scheme == 'http': | |
82 kwargs = self.connection_pool_kw.copy() | |
83 for kw in SSL_KEYWORDS: | |
84 kwargs.pop(kw, None) | |
85 | |
86 return pool_cls(host, port, **kwargs) | |
87 | |
88 def clear(self): | |
89 """ | |
90 Empty our store of pools and direct them all to close. | |
91 | |
92 This will not affect in-flight connections, but they will not be | |
93 re-used after completion. | |
94 """ | |
95 self.pools.clear() | |
96 | |
97 def connection_from_host(self, host, port=None, scheme='http'): | |
98 """ | |
99 Get a :class:`ConnectionPool` based on the host, port, and scheme. | |
100 | |
101 If ``port`` isn't given, it will be derived from the ``scheme`` using | |
102 ``urllib3.connectionpool.port_by_scheme``. | |
103 """ | |
104 | |
105 scheme = scheme or 'http' | |
106 | |
107 port = port or port_by_scheme.get(scheme, 80) | |
108 | |
109 pool_key = (scheme, host, port) | |
110 | |
111 with self.pools.lock: | |
112 # If the scheme, host, or port doesn't match existing open | |
113 # connections, open a new ConnectionPool. | |
114 pool = self.pools.get(pool_key) | |
115 if pool: | |
116 return pool | |
117 | |
118 # Make a fresh ConnectionPool of the desired type | |
119 pool = self._new_pool(scheme, host, port) | |
120 self.pools[pool_key] = pool | |
121 return pool | |
122 | |
123 def connection_from_url(self, url): | |
124 """ | |
125 Similar to :func:`urllib3.connectionpool.connection_from_url` but | |
126 doesn't pass any additional parameters to the | |
127 :class:`urllib3.connectionpool.ConnectionPool` constructor. | |
128 | |
129 Additional parameters are taken from the :class:`.PoolManager` | |
130 constructor. | |
131 """ | |
132 u = parse_url(url) | |
133 return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) | |
134 | |
135 def urlopen(self, method, url, redirect=True, **kw): | |
136 """ | |
137 Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` | |
138 with custom cross-host redirect logic and only sends the request-uri | |
139 portion of the ``url``. | |
140 | |
141 The given ``url`` parameter must be absolute, such that an appropriate | |
142 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. | |
143 """ | |
144 u = parse_url(url) | |
145 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) | |
146 | |
147 kw['assert_same_host'] = False | |
148 kw['redirect'] = False | |
149 if 'headers' not in kw: | |
150 kw['headers'] = self.headers | |
151 | |
152 if self.proxy is not None and u.scheme == "http": | |
153 response = conn.urlopen(method, url, **kw) | |
154 else: | |
155 response = conn.urlopen(method, u.request_uri, **kw) | |
156 | |
157 redirect_location = redirect and response.get_redirect_location() | |
158 if not redirect_location: | |
159 return response | |
160 | |
161 # Support relative URLs for redirecting. | |
162 redirect_location = urljoin(url, redirect_location) | |
163 | |
164 # RFC 2616, Section 10.3.4 | |
165 if response.status == 303: | |
166 method = 'GET' | |
167 | |
168 log.info("Redirecting %s -> %s" % (url, redirect_location)) | |
169 kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown | |
170 kw['redirect'] = redirect | |
171 return self.urlopen(method, redirect_location, **kw) | |
172 | |
173 | |
174 class ProxyManager(PoolManager): | |
175 """ | |
176 Behaves just like :class:`PoolManager`, but sends all requests through | |
177 the defined proxy, using the CONNECT method for HTTPS URLs. | |
178 | |
179 :param poxy_url: | |
180 The URL of the proxy to be used. | |
181 | |
182 :param proxy_headers: | |
183 A dictionary contaning headers that will be sent to the proxy. In case | |
184 of HTTP they are being sent with each request, while in the | |
185 HTTPS/CONNECT case they are sent only once. Could be used for proxy | |
186 authentication. | |
187 | |
188 Example: | |
189 >>> proxy = urllib3.ProxyManager('http://localhost:3128/') | |
190 >>> r1 = proxy.request('GET', 'http://google.com/') | |
191 >>> r2 = proxy.request('GET', 'http://httpbin.org/') | |
192 >>> len(proxy.pools) | |
193 1 | |
194 >>> r3 = proxy.request('GET', 'https://httpbin.org/') | |
195 >>> r4 = proxy.request('GET', 'https://twitter.com/') | |
196 >>> len(proxy.pools) | |
197 3 | |
198 | |
199 """ | |
200 | |
201 def __init__(self, proxy_url, num_pools=10, headers=None, | |
202 proxy_headers=None, **connection_pool_kw): | |
203 | |
204 if isinstance(proxy_url, HTTPConnectionPool): | |
205 proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, | |
206 proxy_url.port) | |
207 proxy = parse_url(proxy_url) | |
208 if not proxy.port: | |
209 port = port_by_scheme.get(proxy.scheme, 80) | |
210 proxy = proxy._replace(port=port) | |
211 self.proxy = proxy | |
212 self.proxy_headers = proxy_headers or {} | |
213 assert self.proxy.scheme in ("http", "https"), \ | |
214 'Not supported proxy scheme %s' % self.proxy.scheme | |
215 connection_pool_kw['_proxy'] = self.proxy | |
216 connection_pool_kw['_proxy_headers'] = self.proxy_headers | |
217 super(ProxyManager, self).__init__( | |
218 num_pools, headers, **connection_pool_kw) | |
219 | |
220 def connection_from_host(self, host, port=None, scheme='http'): | |
221 if scheme == "https": | |
222 return super(ProxyManager, self).connection_from_host( | |
223 host, port, scheme) | |
224 | |
225 return super(ProxyManager, self).connection_from_host( | |
226 self.proxy.host, self.proxy.port, self.proxy.scheme) | |
227 | |
228 def _set_proxy_headers(self, url, headers=None): | |
229 """ | |
230 Sets headers needed by proxies: specifically, the Accept and Host | |
231 headers. Only sets headers not provided by the user. | |
232 """ | |
233 headers_ = {'Accept': '*/*'} | |
234 | |
235 netloc = parse_url(url).netloc | |
236 if netloc: | |
237 headers_['Host'] = netloc | |
238 | |
239 if headers: | |
240 headers_.update(headers) | |
241 return headers_ | |
242 | |
243 def urlopen(self, method, url, redirect=True, **kw): | |
244 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." | |
245 u = parse_url(url) | |
246 | |
247 if u.scheme == "http": | |
248 # It's too late to set proxy headers on per-request basis for | |
249 # tunnelled HTTPS connections, should use | |
250 # constructor's proxy_headers instead. | |
251 kw['headers'] = self._set_proxy_headers(url, kw.get('headers', | |
252 self.headers)) | |
253 kw['headers'].update(self.proxy_headers) | |
254 | |
255 return super(ProxyManager, self).urlopen(method, url, redirect, **kw) | |
256 | |
257 | |
258 def proxy_from_url(url, **kw): | |
259 return ProxyManager(proxy_url=url, **kw) | |
OLD | NEW |