tools/telemetry/third_party/gsutil/gslib/gcs_json_media.py - Issue 1260493004: Revert "Add gsutil 4.13 to telemetry/third_party"

Side by Side Diff: tools/telemetry/third_party/gsutil/gslib/gcs_json_media.py

Issue 1260493004: Revert "Add gsutil 4.13 to telemetry/third_party" (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # -- coding: utf-8 --

2 # Copyright 2014 Google Inc. All Rights Reserved.

3 #

4 # Licensed under the Apache License, Version 2.0 (the "License");

5 # you may not use this file except in compliance with the License.

6 # You may obtain a copy of the License at

7 #

8 # http://www.apache.org/licenses/LICENSE-2.0

9 #

10 # Unless required by applicable law or agreed to in writing, software

11 # distributed under the License is distributed on an "AS IS" BASIS,

12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

13 # See the License for the specific language governing permissions and

14 # limitations under the License.

15 """Media helper functions and classes for Google Cloud Storage JSON API."""

16

17 from __future__ import absolute_import

18

19 import copy

20 import cStringIO

21 import httplib

22 import logging

23 import socket

24 import types

25 import urlparse

26

27 from apitools.base.py import exceptions as apitools_exceptions

28 import httplib2

29 from httplib2 import parse_uri

30

31 from gslib.cloud_api import BadRequestException

32 from gslib.progress_callback import ProgressCallbackWithBackoff

33 from gslib.util import SSL_TIMEOUT

34 from gslib.util import TRANSFER_BUFFER_SIZE

35

36

37 class BytesTransferredContainer(object):

38 """Container class for passing number of bytes transferred to lower layers.

39

40 For resumed transfers or connection rebuilds in the middle of a transfer, we

41 need to rebuild the connection class with how much we've transferred so far.

42 For uploads, we don't know the total number of bytes uploaded until we've

43 queried the server, but we need to create the connection class to pass to

44 httplib2 before we can query the server. This container object allows us to

45 pass a reference into Upload/DownloadCallbackConnection.

46 """

47

48 def __init__(self):

49 self.__bytes_transferred = 0

50

51 @property

52 def bytes_transferred(self):

53 return self.__bytes_transferred

54

55 @bytes_transferred.setter

56 def bytes_transferred(self, value):

57 self.__bytes_transferred = value

58

59

60 class UploadCallbackConnectionClassFactory(object):

61 """Creates a class that can override an httplib2 connection.

62

63 This is used to provide progress callbacks and disable dumping the upload

64 payload during debug statements. It can later be used to provide on-the-fly

65 hash digestion during upload.

66 """

67

68 def __init__(self, bytes_uploaded_container,

69 buffer_size=TRANSFER_BUFFER_SIZE,

70 total_size=0, progress_callback=None):

71 self.bytes_uploaded_container = bytes_uploaded_container

72 self.buffer_size = buffer_size

73 self.total_size = total_size

74 self.progress_callback = progress_callback

75

76 def GetConnectionClass(self):

77 """Returns a connection class that overrides send."""

78 outer_bytes_uploaded_container = self.bytes_uploaded_container

79 outer_buffer_size = self.buffer_size

80 outer_total_size = self.total_size

81 outer_progress_callback = self.progress_callback

82

83 class UploadCallbackConnection(httplib2.HTTPSConnectionWithTimeout):

84 """Connection class override for uploads."""

85 bytes_uploaded_container = outer_bytes_uploaded_container

86 # After we instantiate this class, apitools will check with the server

87 # to find out how many bytes remain for a resumable upload. This allows

88 # us to update our progress once based on that number.

89 processed_initial_bytes = False

90 GCS_JSON_BUFFER_SIZE = outer_buffer_size

91 callback_processor = None

92 size = outer_total_size

93

94 def __init__(self, args, *kwargs):

95 kwargs['timeout'] = SSL_TIMEOUT

96 httplib2.HTTPSConnectionWithTimeout.__init__(self, args, *kwargs)

97

98 def send(self, data):

99 """Overrides HTTPConnection.send."""

100 if not self.processed_initial_bytes:

101 self.processed_initial_bytes = True

102 if outer_progress_callback:

103 self.callback_processor = ProgressCallbackWithBackoff(

104 outer_total_size, outer_progress_callback)

105 self.callback_processor.Progress(

106 self.bytes_uploaded_container.bytes_transferred)

107 # httplib.HTTPConnection.send accepts either a string or a file-like

108 # object (anything that implements read()).

109 if isinstance(data, basestring):

110 full_buffer = cStringIO.StringIO(data)

111 else:

112 full_buffer = data

113 partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE)

114 while partial_buffer:

115 httplib2.HTTPSConnectionWithTimeout.send(self, partial_buffer)

116 send_length = len(partial_buffer)

117 if self.callback_processor:

118 # This is the only place where gsutil has control over making a

119 # callback, but here we can't differentiate the metadata bytes

120 # (such as headers and OAuth2 refreshes) sent during an upload

121 # from the actual upload bytes, so we will actually report

122 # slightly more bytes than desired to the callback handler.

123 #

124 # One considered/rejected alternative is to move the callbacks

125 # into the HashingFileUploadWrapper which only processes reads on

126 # the bytes. This has the disadvantages of being removed from

127 # where we actually send the bytes and unnecessarily

128 # multi-purposing that class.

129 self.callback_processor.Progress(send_length)

130 partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE)

131

132 return UploadCallbackConnection

133

134

135 def WrapUploadHttpRequest(upload_http):

136 """Wraps upload_http so we only use our custom connection_type on PUTs.

137

138 POSTs are used to refresh oauth tokens, and we don't want to process the

139 data sent in those requests.

140

141 Args:

142 upload_http: httplib2.Http instance to wrap

143 """

144 request_orig = upload_http.request

145 def NewRequest(uri, method='GET', body=None, headers=None,

146 redirections=httplib2.DEFAULT_MAX_REDIRECTS,

147 connection_type=None):

148 if method == 'PUT' or method == 'POST':

149 override_connection_type = connection_type

150 else:

151 override_connection_type = None

152 return request_orig(uri, method=method, body=body,

153 headers=headers, redirections=redirections,

154 connection_type=override_connection_type)

155 # Replace the request method with our own closure.

156 upload_http.request = NewRequest

157

158

159 class DownloadCallbackConnectionClassFactory(object):

160 """Creates a class that can override an httplib2 connection.

161

162 This is used to provide progress callbacks, disable dumping the download

163 payload during debug statements, and provide on-the-fly hash digestion during

164 download. On-the-fly digestion is particularly important because httplib2

165 will decompress gzipped content on-the-fly, thus this class provides our

166 only opportunity to calculate the correct hash for an object that has a

167 gzip hash in the cloud.

168 """

169

170 def __init__(self, bytes_downloaded_container,

171 buffer_size=TRANSFER_BUFFER_SIZE, total_size=0,

172 progress_callback=None, digesters=None):

173 self.buffer_size = buffer_size

174 self.total_size = total_size

175 self.progress_callback = progress_callback

176 self.digesters = digesters

177 self.bytes_downloaded_container = bytes_downloaded_container

178

179 def GetConnectionClass(self):

180 """Returns a connection class that overrides getresponse."""

181

182 class DownloadCallbackConnection(httplib2.HTTPSConnectionWithTimeout):

183 """Connection class override for downloads."""

184 outer_total_size = self.total_size

185 outer_digesters = self.digesters

186 outer_progress_callback = self.progress_callback

187 outer_bytes_downloaded_container = self.bytes_downloaded_container

188 processed_initial_bytes = False

189 callback_processor = None

190

191 def __init__(self, args, *kwargs):

192 kwargs['timeout'] = SSL_TIMEOUT

193 httplib2.HTTPSConnectionWithTimeout.__init__(self, args, *kwargs)

194

195 def getresponse(self, buffering=False):

196 """Wraps an HTTPResponse to perform callbacks and hashing.

197

198 In this function, self is a DownloadCallbackConnection.

199

200 Args:

201 buffering: Unused. This function uses a local buffer.

202

203 Returns:

204 HTTPResponse object with wrapped read function.

205 """

206 orig_response = httplib.HTTPConnection.getresponse(self)

207 if orig_response.status not in (httplib.OK, httplib.PARTIAL_CONTENT):

208 return orig_response

209 orig_read_func = orig_response.read

210

211 def read(amt=None): # pylint: disable=invalid-name

212 """Overrides HTTPConnection.getresponse.read.

213

214 This function only supports reads of TRANSFER_BUFFER_SIZE or smaller.

215

216 Args:

217 amt: Integer n where 0 < n <= TRANSFER_BUFFER_SIZE. This is a

218 keyword argument to match the read function it overrides,

219 but it is required.

220

221 Returns:

222 Data read from HTTPConnection.

223 """

224 if not amt or amt > TRANSFER_BUFFER_SIZE:

225 raise BadRequestException(

226 'Invalid HTTP read size %s during download, expected %s.' %

227 (amt, TRANSFER_BUFFER_SIZE))

228 else:

229 amt = amt or TRANSFER_BUFFER_SIZE

230

231 if not self.processed_initial_bytes:

232 self.processed_initial_bytes = True

233 if self.outer_progress_callback:

234 self.callback_processor = ProgressCallbackWithBackoff(

235 self.outer_total_size, self.outer_progress_callback)

236 self.callback_processor.Progress(

237 self.outer_bytes_downloaded_container.bytes_transferred)

238

239 data = orig_read_func(amt)

240 read_length = len(data)

241 if self.callback_processor:

242 self.callback_processor.Progress(read_length)

243 if self.outer_digesters:

244 for alg in self.outer_digesters:

245 self.outer_digesters[alg].update(data)

246 return data

247 orig_response.read = read

248

249 return orig_response

250 return DownloadCallbackConnection

251

252

253 def WrapDownloadHttpRequest(download_http):

254 """Overrides download request functions for an httplib2.Http object.

255

256 Args:

257 download_http: httplib2.Http.object to wrap / override.

258

259 Returns:

260 Wrapped / overridden httplib2.Http object.

261 """

262

263 # httplib2 has a bug https://code.google.com/p/httplib2/issues/detail?id=305

264 # where custom connection_type is not respected after redirects. This

265 # function is copied from httplib2 and overrides the request function so that

266 # the connection_type is properly passed through.

267 # pylint: disable=protected-access,g-inconsistent-quotes,unused-variable

268 # pylint: disable=g-equals-none,g-doc-return-or-yield

269 # pylint: disable=g-short-docstring-punctuation,g-doc-args

270 # pylint: disable=too-many-statements

271 def OverrideRequest(self, conn, host, absolute_uri, request_uri, method,

272 body, headers, redirections, cachekey):

273 """Do the actual request using the connection object.

274

275 Also follow one level of redirects if necessary.

276 """

277

278 auths = ([(auth.depth(request_uri), auth) for auth in self.authorizations

279 if auth.inscope(host, request_uri)])

280 auth = auths and sorted(auths)[0][1] or None

281 if auth:

282 auth.request(method, request_uri, headers, body)

283

284 (response, content) = self._conn_request(conn, request_uri, method, body,

285 headers)

286

287 if auth:

288 if auth.response(response, body):

289 auth.request(method, request_uri, headers, body)

290 (response, content) = self._conn_request(conn, request_uri, method,

291 body, headers)

292 response._stale_digest = 1

293

294 if response.status == 401:

295 for authorization in self._auth_from_challenge(

296 host, request_uri, headers, response, content):

297 authorization.request(method, request_uri, headers, body)

298 (response, content) = self._conn_request(conn, request_uri, method,

299 body, headers)

300 if response.status != 401:

301 self.authorizations.append(authorization)

302 authorization.response(response, body)

303 break

304

305 if (self.follow_all_redirects or (method in ["GET", "HEAD"])

306 or response.status == 303):

307 if self.follow_redirects and response.status in [300, 301, 302,

308 303, 307]:

309 # Pick out the location header and basically start from the beginning

310 # remembering first to strip the ETag header and decrement our 'depth'

311 if redirections:

312 if not response.has_key('location') and response.status != 300:

313 raise httplib2.RedirectMissingLocation(

314 "Redirected but the response is missing a Location: header.",

315 response, content)

316 # Fix-up relative redirects (which violate an RFC 2616 MUST)

317 if response.has_key('location'):

318 location = response['location']

319 (scheme, authority, path, query, fragment) = parse_uri(location)

320 if authority == None:

321 response['location'] = urlparse.urljoin(absolute_uri, location)

322 if response.status == 301 and method in ["GET", "HEAD"]:

323 response['-x-permanent-redirect-url'] = response['location']

324 if not response.has_key('content-location'):

325 response['content-location'] = absolute_uri

326 httplib2._updateCache(headers, response, content, self.cache,

327 cachekey)

328 if headers.has_key('if-none-match'):

329 del headers['if-none-match']

330 if headers.has_key('if-modified-since'):

331 del headers['if-modified-since']

332 if ('authorization' in headers and

333 not self.forward_authorization_headers):

334 del headers['authorization']

335 if response.has_key('location'):

336 location = response['location']

337 old_response = copy.deepcopy(response)

338 if not old_response.has_key('content-location'):

339 old_response['content-location'] = absolute_uri

340 redirect_method = method

341 if response.status in [302, 303]:

342 redirect_method = "GET"

343 body = None

344 (response, content) = self.request(

345 location, redirect_method, body=body, headers=headers,

346 redirections=redirections-1,

347 connection_type=conn.__class__)

348 response.previous = old_response

349 else:

350 raise httplib2.RedirectLimit(

351 "Redirected more times than redirection_limit allows.",

352 response, content)

353 elif response.status in [200, 203] and method in ["GET", "HEAD"]:

354 # Don't cache 206's since we aren't going to handle byte range

355 # requests

356 if not response.has_key('content-location'):

357 response['content-location'] = absolute_uri

358 httplib2._updateCache(headers, response, content, self.cache,

359 cachekey)

360

361 return (response, content)

362

363 # Wrap download_http so we do not use our custom connection_type

364 # on POSTS, which are used to refresh oauth tokens. We don't want to

365 # process the data received in those requests.

366 request_orig = download_http.request

367 def NewRequest(uri, method='GET', body=None, headers=None,

368 redirections=httplib2.DEFAULT_MAX_REDIRECTS,

369 connection_type=None):

370 if method == 'POST':

371 return request_orig(uri, method=method, body=body,

372 headers=headers, redirections=redirections,

373 connection_type=None)

374 else:

375 return request_orig(uri, method=method, body=body,

376 headers=headers, redirections=redirections,

377 connection_type=connection_type)

378

379 # Replace the request methods with our own closures.

380 download_http._request = types.MethodType(OverrideRequest, download_http)

381 download_http.request = NewRequest

382

383 return download_http

384

385

386 class HttpWithNoRetries(httplib2.Http):

387 """httplib2.Http variant that does not retry.

388

389 httplib2 automatically retries requests according to httplib2.RETRIES, but

390 in certain cases httplib2 ignores the RETRIES value and forces a retry.

391 Because httplib2 does not handle the case where the underlying request body

392 is a stream, a retry may cause a non-idempotent write as the stream is

393 partially consumed and not reset before the retry occurs.

394

395 Here we override _conn_request to disable retries unequivocally, so that

396 uploads may be retried at higher layers that properly handle stream request

397 bodies.

398 """

399

400 def _conn_request(self, conn, request_uri, method, body, headers): # pylint: disable=too-many-statements

401

402 try:

403 if hasattr(conn, 'sock') and conn.sock is None:

404 conn.connect()

405 conn.request(method, request_uri, body, headers)

406 except socket.timeout:

407 raise

408 except socket.gaierror:

409 conn.close()

410 raise httplib2.ServerNotFoundError(

411 'Unable to find the server at %s' % conn.host)

412 except httplib2.ssl_SSLError:

413 conn.close()

414 raise

415 except socket.error, e:

416 err = 0

417 if hasattr(e, 'args'):

418 err = getattr(e, 'args')[0]

419 else:

420 err = e.errno

421 if err == httplib2.errno.ECONNREFUSED: # Connection refused

422 raise

423 except httplib.HTTPException:

424 conn.close()

425 raise

426 try:

427 response = conn.getresponse()

428 except (socket.error, httplib.HTTPException):

429 conn.close()

430 raise

431 else:

432 content = ''

433 if method == 'HEAD':

434 conn.close()

435 else:

436 content = response.read()

437 response = httplib2.Response(response)

438 if method != 'HEAD':

439 # pylint: disable=protected-access

440 content = httplib2._decompressContent(response, content)

441 return (response, content)

442

443

444 class HttpWithDownloadStream(httplib2.Http):

445 """httplib2.Http variant that only pushes bytes through a stream.

446

447 httplib2 handles media by storing entire chunks of responses in memory, which

448 is undesirable particularly when multiple instances are used during

449 multi-threaded/multi-process copy. This class copies and then overrides some

450 httplib2 functions to use a streaming copy approach that uses small memory

451 buffers.

452

453 Also disables httplib2 retries (for reasons stated in the HttpWithNoRetries

454 class doc).

455 """

456

457 def __init__(self, stream=None, args, *kwds):

458 if stream is None:

459 raise apitools_exceptions.InvalidUserInputError(

460 'Cannot create HttpWithDownloadStream with no stream')

461 self._stream = stream

462 self._logger = logging.getLogger()

463 super(HttpWithDownloadStream, self).__init__(args, *kwds)

464

465 @property

466 def stream(self):

467 return self._stream

468

469 def _conn_request(self, conn, request_uri, method, body, headers): # pylint: disable=too-many-statements

470 try:

471 if hasattr(conn, 'sock') and conn.sock is None:

472 conn.connect()

473 conn.request(method, request_uri, body, headers)

474 except socket.timeout:

475 raise

476 except socket.gaierror:

477 conn.close()

478 raise httplib2.ServerNotFoundError(

479 'Unable to find the server at %s' % conn.host)

480 except httplib2.ssl_SSLError:

481 conn.close()

482 raise

483 except socket.error, e:

484 err = 0

485 if hasattr(e, 'args'):

486 err = getattr(e, 'args')[0]

487 else:

488 err = e.errno

489 if err == httplib2.errno.ECONNREFUSED: # Connection refused

490 raise

491 except httplib.HTTPException:

492 # Just because the server closed the connection doesn't apparently mean

493 # that the server didn't send a response.

494 conn.close()

495 raise

496 try:

497 response = conn.getresponse()

498 except (socket.error, httplib.HTTPException):

499 conn.close()

500 raise

501 else:

502 content = ''

503 if method == 'HEAD':

504 conn.close()

505 response = httplib2.Response(response)

506 else:

507 if response.status in (httplib.OK, httplib.PARTIAL_CONTENT):

508 content_length = None

509 if hasattr(response, 'msg'):

510 content_length = response.getheader('content-length')

511 http_stream = response

512 bytes_read = 0

513 while True:

514 new_data = http_stream.read(TRANSFER_BUFFER_SIZE)

515 if new_data:

516 self.stream.write(new_data)

517 bytes_read += len(new_data)

518 else:

519 break

520

521 if (content_length is not None and

522 long(bytes_read) != long(content_length)):

523 # The input stream terminated before we were able to read the

524 # entire contents, possibly due to a network condition. Set

525 # content-length to indicate how many bytes we actually read.

526 self._logger.log(

527 logging.DEBUG, 'Only got %s bytes out of content-length %s '

528 'for request URI %s. Resetting content-length to match '

529 'bytes read.', bytes_read, content_length, request_uri)

530 response.msg['content-length'] = str(bytes_read)

531 response = httplib2.Response(response)

532 else:

533 # We fall back to the current httplib2 behavior if we're

534 # not processing bytes (eg it's a redirect).

535 content = response.read()

536 response = httplib2.Response(response)

537 # pylint: disable=protected-access

538 content = httplib2._decompressContent(response, content)

539 return (response, content)

OLD	NEW