OLD | NEW |
| (Empty) |
1 # urllib3/response.py | |
2 # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) | |
3 # | |
4 # This module is part of urllib3 and is released under | |
5 # the MIT License: http://www.opensource.org/licenses/mit-license.php | |
6 | |
7 | |
8 import logging | |
9 import zlib | |
10 import io | |
11 | |
12 from .exceptions import DecodeError | |
13 from .packages.six import string_types as basestring, binary_type | |
14 from .util import is_fp_closed | |
15 | |
16 | |
17 log = logging.getLogger(__name__) | |
18 | |
19 | |
20 class DeflateDecoder(object): | |
21 | |
22 def __init__(self): | |
23 self._first_try = True | |
24 self._data = binary_type() | |
25 self._obj = zlib.decompressobj() | |
26 | |
27 def __getattr__(self, name): | |
28 return getattr(self._obj, name) | |
29 | |
30 def decompress(self, data): | |
31 if not self._first_try: | |
32 return self._obj.decompress(data) | |
33 | |
34 self._data += data | |
35 try: | |
36 return self._obj.decompress(data) | |
37 except zlib.error: | |
38 self._first_try = False | |
39 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) | |
40 try: | |
41 return self.decompress(self._data) | |
42 finally: | |
43 self._data = None | |
44 | |
45 | |
46 def _get_decoder(mode): | |
47 if mode == 'gzip': | |
48 return zlib.decompressobj(16 + zlib.MAX_WBITS) | |
49 | |
50 return DeflateDecoder() | |
51 | |
52 | |
53 class HTTPResponse(io.IOBase): | |
54 """ | |
55 HTTP Response container. | |
56 | |
57 Backwards-compatible to httplib's HTTPResponse but the response ``body`` is | |
58 loaded and decoded on-demand when the ``data`` property is accessed. | |
59 | |
60 Extra parameters for behaviour not present in httplib.HTTPResponse: | |
61 | |
62 :param preload_content: | |
63 If True, the response's body will be preloaded during construction. | |
64 | |
65 :param decode_content: | |
66 If True, attempts to decode specific content-encoding's based on headers | |
67 (like 'gzip' and 'deflate') will be skipped and raw data will be used | |
68 instead. | |
69 | |
70 :param original_response: | |
71 When this HTTPResponse wrapper is generated from an httplib.HTTPResponse | |
72 object, it's convenient to include the original for debug purposes. It's | |
73 otherwise unused. | |
74 """ | |
75 | |
76 CONTENT_DECODERS = ['gzip', 'deflate'] | |
77 REDIRECT_STATUSES = [301, 302, 303, 307, 308] | |
78 | |
79 def __init__(self, body='', headers=None, status=0, version=0, reason=None, | |
80 strict=0, preload_content=True, decode_content=True, | |
81 original_response=None, pool=None, connection=None): | |
82 self.headers = headers or {} | |
83 self.status = status | |
84 self.version = version | |
85 self.reason = reason | |
86 self.strict = strict | |
87 self.decode_content = decode_content | |
88 | |
89 self._decoder = None | |
90 self._body = body if body and isinstance(body, basestring) else None | |
91 self._fp = None | |
92 self._original_response = original_response | |
93 | |
94 self._pool = pool | |
95 self._connection = connection | |
96 | |
97 if hasattr(body, 'read'): | |
98 self._fp = body | |
99 | |
100 if preload_content and not self._body: | |
101 self._body = self.read(decode_content=decode_content) | |
102 | |
103 def get_redirect_location(self): | |
104 """ | |
105 Should we redirect and where to? | |
106 | |
107 :returns: Truthy redirect location string if we got a redirect status | |
108 code and valid location. ``None`` if redirect status and no | |
109 location. ``False`` if not a redirect status code. | |
110 """ | |
111 if self.status in self.REDIRECT_STATUSES: | |
112 return self.headers.get('location') | |
113 | |
114 return False | |
115 | |
116 def release_conn(self): | |
117 if not self._pool or not self._connection: | |
118 return | |
119 | |
120 self._pool._put_conn(self._connection) | |
121 self._connection = None | |
122 | |
123 @property | |
124 def data(self): | |
125 # For backwords-compat with earlier urllib3 0.4 and earlier. | |
126 if self._body: | |
127 return self._body | |
128 | |
129 if self._fp: | |
130 return self.read(cache_content=True) | |
131 | |
132 def read(self, amt=None, decode_content=None, cache_content=False): | |
133 """ | |
134 Similar to :meth:`httplib.HTTPResponse.read`, but with two additional | |
135 parameters: ``decode_content`` and ``cache_content``. | |
136 | |
137 :param amt: | |
138 How much of the content to read. If specified, caching is skipped | |
139 because it doesn't make sense to cache partial content as the full | |
140 response. | |
141 | |
142 :param decode_content: | |
143 If True, will attempt to decode the body based on the | |
144 'content-encoding' header. | |
145 | |
146 :param cache_content: | |
147 If True, will save the returned data such that the same result is | |
148 returned despite of the state of the underlying file object. This | |
149 is useful if you want the ``.data`` property to continue working | |
150 after having ``.read()`` the file object. (Overridden if ``amt`` is | |
151 set.) | |
152 """ | |
153 # Note: content-encoding value should be case-insensitive, per RFC 2616 | |
154 # Section 3.5 | |
155 content_encoding = self.headers.get('content-encoding', '').lower() | |
156 if self._decoder is None: | |
157 if content_encoding in self.CONTENT_DECODERS: | |
158 self._decoder = _get_decoder(content_encoding) | |
159 if decode_content is None: | |
160 decode_content = self.decode_content | |
161 | |
162 if self._fp is None: | |
163 return | |
164 | |
165 flush_decoder = False | |
166 | |
167 try: | |
168 if amt is None: | |
169 # cStringIO doesn't like amt=None | |
170 data = self._fp.read() | |
171 flush_decoder = True | |
172 else: | |
173 cache_content = False | |
174 data = self._fp.read(amt) | |
175 if amt != 0 and not data: # Platform-specific: Buggy versions o
f Python. | |
176 # Close the connection when no data is returned | |
177 # | |
178 # This is redundant to what httplib/http.client _should_ | |
179 # already do. However, versions of python released before | |
180 # December 15, 2012 (http://bugs.python.org/issue16298) do n
ot | |
181 # properly close the connection in all cases. There is no ha
rm | |
182 # in redundantly calling close. | |
183 self._fp.close() | |
184 flush_decoder = True | |
185 | |
186 try: | |
187 if decode_content and self._decoder: | |
188 data = self._decoder.decompress(data) | |
189 except (IOError, zlib.error) as e: | |
190 raise DecodeError( | |
191 "Received response with content-encoding: %s, but " | |
192 "failed to decode it." % content_encoding, | |
193 e) | |
194 | |
195 if flush_decoder and decode_content and self._decoder: | |
196 buf = self._decoder.decompress(binary_type()) | |
197 data += buf + self._decoder.flush() | |
198 | |
199 if cache_content: | |
200 self._body = data | |
201 | |
202 return data | |
203 | |
204 finally: | |
205 if self._original_response and self._original_response.isclosed(): | |
206 self.release_conn() | |
207 | |
208 def stream(self, amt=2**16, decode_content=None): | |
209 """ | |
210 A generator wrapper for the read() method. A call will block until | |
211 ``amt`` bytes have been read from the connection or until the | |
212 connection is closed. | |
213 | |
214 :param amt: | |
215 How much of the content to read. The generator will return up to | |
216 much data per iteration, but may return less. This is particularly | |
217 likely when using compressed data. However, the empty string will | |
218 never be returned. | |
219 | |
220 :param decode_content: | |
221 If True, will attempt to decode the body based on the | |
222 'content-encoding' header. | |
223 """ | |
224 while not is_fp_closed(self._fp): | |
225 data = self.read(amt=amt, decode_content=decode_content) | |
226 | |
227 if data: | |
228 yield data | |
229 | |
230 | |
231 @classmethod | |
232 def from_httplib(ResponseCls, r, **response_kw): | |
233 """ | |
234 Given an :class:`httplib.HTTPResponse` instance ``r``, return a | |
235 corresponding :class:`urllib3.response.HTTPResponse` object. | |
236 | |
237 Remaining parameters are passed to the HTTPResponse constructor, along | |
238 with ``original_response=r``. | |
239 """ | |
240 | |
241 # Normalize headers between different versions of Python | |
242 headers = {} | |
243 for k, v in r.getheaders(): | |
244 # Python 3: Header keys are returned capitalised | |
245 k = k.lower() | |
246 | |
247 has_value = headers.get(k) | |
248 if has_value: # Python 3: Repeating header keys are unmerged. | |
249 v = ', '.join([has_value, v]) | |
250 | |
251 headers[k] = v | |
252 | |
253 # HTTPResponse objects in Python 3 don't have a .strict attribute | |
254 strict = getattr(r, 'strict', 0) | |
255 return ResponseCls(body=r, | |
256 headers=headers, | |
257 status=r.status, | |
258 version=r.version, | |
259 reason=r.reason, | |
260 strict=strict, | |
261 original_response=r, | |
262 **response_kw) | |
263 | |
264 # Backwards-compatibility methods for httplib.HTTPResponse | |
265 def getheaders(self): | |
266 return self.headers | |
267 | |
268 def getheader(self, name, default=None): | |
269 return self.headers.get(name, default) | |
270 | |
271 # Overrides from io.IOBase | |
272 def close(self): | |
273 if not self.closed: | |
274 self._fp.close() | |
275 | |
276 @property | |
277 def closed(self): | |
278 if self._fp is None: | |
279 return True | |
280 elif hasattr(self._fp, 'closed'): | |
281 return self._fp.closed | |
282 elif hasattr(self._fp, 'isclosed'): # Python 2 | |
283 return self._fp.isclosed() | |
284 else: | |
285 return True | |
286 | |
287 def fileno(self): | |
288 if self._fp is None: | |
289 raise IOError("HTTPResponse has no file to get a fileno from") | |
290 elif hasattr(self._fp, "fileno"): | |
291 return self._fp.fileno() | |
292 else: | |
293 raise IOError("The file-like object this HTTPResponse is wrapped " | |
294 "around has no file descriptor") | |
295 | |
296 def flush(self): | |
297 if self._fp is not None and hasattr(self._fp, 'flush'): | |
298 return self._fp.flush() | |
299 | |
300 def readable(self): | |
301 return True | |
OLD | NEW |