| OLD | NEW |
| (Empty) | |
| 1 # Copyright 2014 Google Inc. All Rights Reserved. |
| 2 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at |
| 6 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # |
| 9 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. |
| 14 """HTTP wrapper for apitools. |
| 15 |
| 16 This library wraps the underlying http library we use, which is |
| 17 currently httplib2. |
| 18 """ |
| 19 |
| 20 import collections |
| 21 import contextlib |
| 22 import httplib |
| 23 import logging |
| 24 import socket |
| 25 import time |
| 26 import urlparse |
| 27 |
| 28 import httplib2 |
| 29 |
| 30 from gslib.third_party.storage_apitools import exceptions |
| 31 from gslib.third_party.storage_apitools import util |
| 32 |
| 33 __all__ = [ |
| 34 'GetHttp', |
| 35 'MakeRequest', |
| 36 ] |
| 37 |
| 38 |
| 39 # 308 and 429 don't have names in httplib. |
| 40 RESUME_INCOMPLETE = 308 |
| 41 TOO_MANY_REQUESTS = 429 |
| 42 _REDIRECT_STATUS_CODES = ( |
| 43 httplib.MOVED_PERMANENTLY, |
| 44 httplib.FOUND, |
| 45 httplib.SEE_OTHER, |
| 46 httplib.TEMPORARY_REDIRECT, |
| 47 RESUME_INCOMPLETE, |
| 48 ) |
| 49 |
| 50 # http: An httplib2.Http instance. |
| 51 # http_request: A http_wrapper.Request. |
| 52 # exc: Exception being raised. |
| 53 # num_retries: Number of retries consumed; used for exponential backoff. |
| 54 ExceptionRetryArgs = collections.namedtuple('ExceptionRetryArgs', |
| 55 ['http', 'http_request', 'exc', |
| 56 'num_retries']) |
| 57 |
| 58 |
| 59 @contextlib.contextmanager |
| 60 def _Httplib2Debuglevel(http_request, level, http=None): |
| 61 """Temporarily change the value of httplib2.debuglevel if needed. |
| 62 |
| 63 If http_request has a `loggable_body` distinct from `body`, then we |
| 64 need to prevent httplib2 from logging the full body. This sets |
| 65 httplib2.debuglevel for the duration of the `with` block; however, |
| 66 that alone won't change the value of existing HTTP connections. If |
| 67 an httplib2.Http object is provided, we'll also change the level on |
| 68 any cached connections attached to it. |
| 69 """ |
| 70 if http_request.loggable_body is None: |
| 71 yield |
| 72 return |
| 73 old_level = httplib2.debuglevel |
| 74 http_levels = {} |
| 75 httplib2.debuglevel = level |
| 76 if http is not None: |
| 77 for connection_key, connection in http.connections.iteritems(): |
| 78 # httplib2 stores two kinds of values in this dict, connection |
| 79 # classes and instances. Since the connection types are all |
| 80 # old-style classes, we can't easily distinguish by connection |
| 81 # type -- so instead we use the key pattern. |
| 82 if ':' not in connection_key: |
| 83 continue |
| 84 http_levels[connection_key] = connection.debuglevel |
| 85 connection.set_debuglevel(level) |
| 86 yield |
| 87 httplib2.debuglevel = old_level |
| 88 if http is not None: |
| 89 for connection_key, old_level in http_levels: |
| 90 if connection_key in http.connections: |
| 91 http.connections[connection_key].set_debuglevel(old_level) |
| 92 |
| 93 |
| 94 class Request(object): |
| 95 """Class encapsulating the data for an HTTP request.""" |
| 96 |
| 97 def __init__(self, url='', http_method='GET', headers=None, body=''): |
| 98 self.url = url |
| 99 self.http_method = http_method |
| 100 self.headers = headers or {} |
| 101 self.__body = None |
| 102 self.__loggable_body = None |
| 103 self.body = body |
| 104 |
| 105 @property |
| 106 def loggable_body(self): |
| 107 return self.__loggable_body |
| 108 |
| 109 @loggable_body.setter |
| 110 def loggable_body(self, value): |
| 111 if self.body is None: |
| 112 raise exceptions.RequestError( |
| 113 'Cannot set loggable body on request with no body') |
| 114 self.__loggable_body = value |
| 115 |
| 116 @property |
| 117 def body(self): |
| 118 return self.__body |
| 119 |
| 120 @body.setter |
| 121 def body(self, value): |
| 122 self.__body = value |
| 123 if value is not None: |
| 124 self.headers['content-length'] = str(len(self.__body)) |
| 125 else: |
| 126 self.headers.pop('content-length', None) |
| 127 # This line ensures we don't try to print large requests. |
| 128 if not isinstance(value, basestring): |
| 129 self.loggable_body = '<media body>' |
| 130 |
| 131 |
| 132 # Note: currently the order of fields here is important, since we want |
| 133 # to be able to pass in the result from httplib2.request. |
| 134 class Response(collections.namedtuple( |
| 135 'HttpResponse', ['info', 'content', 'request_url'])): |
| 136 """Class encapsulating data for an HTTP response.""" |
| 137 __slots__ = () |
| 138 |
| 139 def __len__(self): |
| 140 return self.length |
| 141 |
| 142 @property |
| 143 def length(self): |
| 144 """Return the length of this response. |
| 145 |
| 146 We expose this as an attribute since using len() directly can fail |
| 147 for responses larger than sys.maxint. |
| 148 |
| 149 Returns: |
| 150 Response length (as int or long) |
| 151 """ |
| 152 def ProcessContentRange(content_range): |
| 153 _, _, range_spec = content_range.partition(' ') |
| 154 byte_range, _, _ = range_spec.partition('/') |
| 155 start, _, end = byte_range.partition('-') |
| 156 return int(end) - int(start) + 1 |
| 157 |
| 158 if '-content-encoding' in self.info and 'content-range' in self.info: |
| 159 # httplib2 rewrites content-length in the case of a compressed |
| 160 # transfer; we can't trust the content-length header in that |
| 161 # case, but we *can* trust content-range, if it's present. |
| 162 return ProcessContentRange(self.info['content-range']) |
| 163 elif 'content-length' in self.info: |
| 164 return int(self.info.get('content-length')) |
| 165 elif 'content-range' in self.info: |
| 166 return ProcessContentRange(self.info['content-range']) |
| 167 return len(self.content) |
| 168 |
| 169 @property |
| 170 def status_code(self): |
| 171 return int(self.info['status']) |
| 172 |
| 173 @property |
| 174 def retry_after(self): |
| 175 if 'retry-after' in self.info: |
| 176 return int(self.info['retry-after']) |
| 177 |
| 178 @property |
| 179 def is_redirect(self): |
| 180 return (self.status_code in _REDIRECT_STATUS_CODES and |
| 181 'location' in self.info) |
| 182 |
| 183 |
| 184 def CheckResponse(response): |
| 185 if response is None: |
| 186 # Caller shouldn't call us if the response is None, but handle anyway. |
| 187 raise exceptions.RequestError('Request to url %s did not return a response.' |
| 188 % response.request_url) |
| 189 elif (response.status_code >= 500 or |
| 190 response.status_code == TOO_MANY_REQUESTS): |
| 191 raise exceptions.BadStatusCodeError.FromResponse(response) |
| 192 elif response.status_code == httplib.UNAUTHORIZED: |
| 193 # Sometimes we get a 401 after a connection break. |
| 194 # TODO: this shouldn't be a retryable exception, but for now we retry. |
| 195 raise exceptions.BadStatusCodeError.FromResponse(response) |
| 196 elif response.retry_after: |
| 197 raise exceptions.RetryAfterError.FromResponse(response) |
| 198 |
| 199 |
| 200 def RebuildHttpConnections(http): |
| 201 """Rebuilds all http connections in the httplib2.Http instance. |
| 202 |
| 203 httplib2 overloads the map in http.connections to contain two different |
| 204 types of values: |
| 205 { scheme string: connection class } and |
| 206 { scheme + authority string : actual http connection } |
| 207 Here we remove all of the entries for actual connections so that on the |
| 208 next request httplib2 will rebuild them from the connection types. |
| 209 |
| 210 Args: |
| 211 http: An httplib2.Http instance. |
| 212 """ |
| 213 if getattr(http, 'connections', None): |
| 214 for conn_key in http.connections.keys(): |
| 215 if ':' in conn_key: |
| 216 del http.connections[conn_key] |
| 217 |
| 218 |
| 219 def RethrowExceptionHandler(*unused_args): |
| 220 raise |
| 221 |
| 222 |
| 223 def HandleExceptionsAndRebuildHttpConnections(retry_args): |
| 224 """Exception handler for http failures. |
| 225 |
| 226 This catches known failures and rebuilds the underlying HTTP connections. |
| 227 |
| 228 Args: |
| 229 retry_args: An ExceptionRetryArgs tuple. |
| 230 """ |
| 231 retry_after = None |
| 232 if isinstance(retry_args.exc, httplib.BadStatusLine): |
| 233 logging.error('Caught BadStatusLine from httplib, retrying: %s', |
| 234 retry_args.exc) |
| 235 elif isinstance(retry_args.exc, socket.error): |
| 236 logging.error('Caught socket error, retrying: %s', retry_args.exc) |
| 237 elif isinstance(retry_args.exc, exceptions.BadStatusCodeError): |
| 238 logging.error('Response returned status %s, retrying', |
| 239 retry_args.exc.status_code) |
| 240 elif isinstance(retry_args.exc, exceptions.RetryAfterError): |
| 241 logging.error('Response returned a retry-after header, retrying') |
| 242 retry_after = retry_args.exc.retry_after |
| 243 elif isinstance(retry_args.exc, ValueError): |
| 244 # oauth2_client tries to JSON-decode the response, which can result |
| 245 # in a ValueError if the response was invalid. Until that is fixed in |
| 246 # oauth2_client, need to handle it here. |
| 247 logging.error('Response content was invalid (%s), retrying', |
| 248 retry_args.exc) |
| 249 elif isinstance(retry_args.exc, exceptions.RequestError): |
| 250 logging.error('Request returned no response, retrying') |
| 251 else: |
| 252 raise |
| 253 RebuildHttpConnections(retry_args.http) |
| 254 logging.error('Retrying request to url %s after exception %s', |
| 255 retry_args.http_request.url, retry_args.exc) |
| 256 time.sleep(retry_after or util.CalculateWaitForRetry(retry_args.num_retries)) |
| 257 |
| 258 |
| 259 def MakeRequest(http, http_request, retries=7, redirections=5, |
| 260 retry_func=HandleExceptionsAndRebuildHttpConnections, |
| 261 check_response_func=CheckResponse): |
| 262 """Send http_request via the given http, performing error/retry handling. |
| 263 |
| 264 Args: |
| 265 http: An httplib2.Http instance, or a http multiplexer that delegates to |
| 266 an underlying http, for example, HTTPMultiplexer. |
| 267 http_request: A Request to send. |
| 268 retries: (int, default 5) Number of retries to attempt on 5XX replies. |
| 269 redirections: (int, default 5) Number of redirects to follow. |
| 270 retry_func: Function to handle retries on exceptions. Arguments are |
| 271 (Httplib2.Http, Request, Exception, int num_retries). |
| 272 check_response_func: Function to validate the HTTP response. Arguments are |
| 273 (Response, response content, url). |
| 274 |
| 275 Returns: |
| 276 A Response object. |
| 277 """ |
| 278 retry = 0 |
| 279 while True: |
| 280 try: |
| 281 return _MakeRequestNoRetry(http, http_request, redirections=redirections, |
| 282 check_response_func=check_response_func) |
| 283 # retry_func will consume the exception types it handles and raise. |
| 284 # pylint: disable=broad-except |
| 285 except Exception as e: |
| 286 retry += 1 |
| 287 if retry >= retries: |
| 288 raise |
| 289 else: |
| 290 retry_func(ExceptionRetryArgs(http, http_request, e, retry)) |
| 291 |
| 292 |
| 293 def _MakeRequestNoRetry(http, http_request, redirections=5, |
| 294 check_response_func=CheckResponse): |
| 295 """Send http_request via the given http. |
| 296 |
| 297 This wrapper exists to handle translation between the plain httplib2 |
| 298 request/response types and the Request and Response types above. |
| 299 |
| 300 Args: |
| 301 http: An httplib2.Http instance, or a http multiplexer that delegates to |
| 302 an underlying http, for example, HTTPMultiplexer. |
| 303 http_request: A Request to send. |
| 304 redirections: (int, default 5) Number of redirects to follow. |
| 305 check_response_func: Function to validate the HTTP response. Arguments are |
| 306 (Response, response content, url). |
| 307 |
| 308 Returns: |
| 309 Response object. |
| 310 |
| 311 Raises: |
| 312 RequestError if no response could be parsed. |
| 313 """ |
| 314 connection_type = None |
| 315 if getattr(http, 'connections', None): |
| 316 url_scheme = urlparse.urlsplit(http_request.url).scheme |
| 317 if url_scheme and url_scheme in http.connections: |
| 318 connection_type = http.connections[url_scheme] |
| 319 |
| 320 # Custom printing only at debuglevel 4 |
| 321 new_debuglevel = 4 if httplib2.debuglevel == 4 else 0 |
| 322 with _Httplib2Debuglevel(http_request, new_debuglevel, http=http): |
| 323 info, content = http.request( |
| 324 str(http_request.url), method=str(http_request.http_method), |
| 325 body=http_request.body, headers=http_request.headers, |
| 326 redirections=redirections, connection_type=connection_type) |
| 327 |
| 328 if info is None: |
| 329 raise exceptions.RequestError() |
| 330 |
| 331 response = Response(info, content, http_request.url) |
| 332 check_response_func(response) |
| 333 return response |
| 334 |
| 335 |
| 336 def GetHttp(): |
| 337 return httplib2.Http() |
| OLD | NEW |