Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(178)

Side by Side Diff: gslib/boto_translation.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gslib/boto_resumable_upload.py ('k') | gslib/bucket_listing_ref.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 # -*- coding: utf-8 -*-
2 # Copyright 2013 Google Inc. All Rights Reserved.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """XML/boto gsutil Cloud API implementation for GCS and Amazon S3."""
16
17 from __future__ import absolute_import
18
19 import base64
20 import binascii
21 import datetime
22 import errno
23 import httplib
24 import json
25 import multiprocessing
26 import os
27 import pickle
28 import random
29 import re
30 import socket
31 import tempfile
32 import textwrap
33 import time
34 import xml
35 from xml.dom.minidom import parseString as XmlParseString
36 from xml.sax import _exceptions as SaxExceptions
37
38 import boto
39 from boto import handler
40 from boto.exception import ResumableDownloadException as BotoResumableDownloadEx ception
41 from boto.exception import ResumableTransferDisposition
42 from boto.gs.cors import Cors
43 from boto.gs.lifecycle import LifecycleConfig
44 from boto.s3.deletemarker import DeleteMarker
45 from boto.s3.prefix import Prefix
46
47 from gslib.boto_resumable_upload import BotoResumableUpload
48 from gslib.cloud_api import AccessDeniedException
49 from gslib.cloud_api import ArgumentException
50 from gslib.cloud_api import BadRequestException
51 from gslib.cloud_api import CloudApi
52 from gslib.cloud_api import NotEmptyException
53 from gslib.cloud_api import NotFoundException
54 from gslib.cloud_api import PreconditionException
55 from gslib.cloud_api import ResumableDownloadException
56 from gslib.cloud_api import ResumableUploadAbortException
57 from gslib.cloud_api import ResumableUploadException
58 from gslib.cloud_api import ServiceException
59 from gslib.cloud_api_helper import ValidateDstObjectMetadata
60 from gslib.exception import CommandException
61 from gslib.exception import InvalidUrlError
62 from gslib.project_id import GOOG_PROJ_ID_HDR
63 from gslib.project_id import PopulateProjectId
64 from gslib.storage_url import StorageUrlFromString
65 from gslib.third_party.storage_apitools import storage_v1_messages as apitools_m essages
66 from gslib.translation_helper import AclTranslation
67 from gslib.translation_helper import AddS3MarkerAclToObjectMetadata
68 from gslib.translation_helper import CorsTranslation
69 from gslib.translation_helper import CreateBucketNotFoundException
70 from gslib.translation_helper import CreateObjectNotFoundException
71 from gslib.translation_helper import DEFAULT_CONTENT_TYPE
72 from gslib.translation_helper import EncodeStringAsLong
73 from gslib.translation_helper import GenerationFromUrlAndString
74 from gslib.translation_helper import HeadersFromObjectMetadata
75 from gslib.translation_helper import LifecycleTranslation
76 from gslib.translation_helper import REMOVE_CORS_CONFIG
77 from gslib.translation_helper import S3MarkerAclFromObjectMetadata
78 from gslib.util import ConfigureNoOpAuthIfNeeded
79 from gslib.util import DEFAULT_FILE_BUFFER_SIZE
80 from gslib.util import GetFileSize
81 from gslib.util import GetMaxRetryDelay
82 from gslib.util import GetNumRetries
83 from gslib.util import MultiprocessingIsAvailable
84 from gslib.util import S3_DELETE_MARKER_GUID
85 from gslib.util import TWO_MB
86 from gslib.util import UnaryDictToXml
87 from gslib.util import UTF8
88 from gslib.util import XML_PROGRESS_CALLBACKS
89
90 TRANSLATABLE_BOTO_EXCEPTIONS = (boto.exception.BotoServerError,
91 boto.exception.InvalidUriError,
92 boto.exception.ResumableDownloadException,
93 boto.exception.ResumableUploadException,
94 boto.exception.StorageCreateError,
95 boto.exception.StorageResponseError)
96
97 # If multiprocessing is available, this will be overridden to a (thread-safe)
98 # multiprocessing.Value in a call to InitializeMultiprocessingVariables.
99 boto_auth_initialized = False
100
101 NON_EXISTENT_OBJECT_REGEX = re.compile(r'.*non-\s*existent\s*object',
102 flags=re.DOTALL)
103 # Determines whether an etag is a valid MD5.
104 MD5_REGEX = re.compile(r'^"*[a-fA-F0-9]{32}"*$')
105
106
107 def InitializeMultiprocessingVariables():
108 """Perform necessary initialization for multiprocessing.
109
110 See gslib.command.InitializeMultiprocessingVariables for an explanation
111 of why this is necessary.
112 """
113 global boto_auth_initialized # pylint: disable=global-variable-undefined
114 boto_auth_initialized = multiprocessing.Value('i', 0)
115
116
117 class BotoTranslation(CloudApi):
118 """Boto-based XML translation implementation of gsutil Cloud API.
119
120 This class takes gsutil Cloud API objects, translates them to XML service
121 calls, and translates the results back into gsutil Cloud API objects for
122 use by the caller.
123 """
124
125 def __init__(self, bucket_storage_uri_class, logger, provider=None,
126 credentials=None, debug=0):
127 """Performs necessary setup for interacting with the cloud storage provider.
128
129 Args:
130 bucket_storage_uri_class: boto storage_uri class, used by APIs that
131 provide boto translation or mocking.
132 logger: logging.logger for outputting log messages.
133 provider: Provider prefix describing cloud storage provider to connect to.
134 'gs' and 's3' are supported. Function implementations ignore
135 the provider argument and use this one instead.
136 credentials: Unused.
137 debug: Debug level for the API implementation (0..3).
138 """
139 super(BotoTranslation, self).__init__(bucket_storage_uri_class, logger,
140 provider=provider, debug=debug)
141 _ = credentials
142 global boto_auth_initialized # pylint: disable=global-variable-undefined
143 if MultiprocessingIsAvailable()[0] and not boto_auth_initialized.value:
144 ConfigureNoOpAuthIfNeeded()
145 boto_auth_initialized.value = 1
146 elif not boto_auth_initialized:
147 ConfigureNoOpAuthIfNeeded()
148 boto_auth_initialized = True
149 self.api_version = boto.config.get_value(
150 'GSUtil', 'default_api_version', '1')
151
152 def GetBucket(self, bucket_name, provider=None, fields=None):
153 """See CloudApi class for function doc strings."""
154 _ = provider
155 bucket_uri = self._StorageUriForBucket(bucket_name)
156 headers = {}
157 self._AddApiVersionToHeaders(headers)
158 try:
159 return self._BotoBucketToBucket(bucket_uri.get_bucket(validate=True,
160 headers=headers),
161 fields=fields)
162 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
163 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
164
165 def ListBuckets(self, project_id=None, provider=None, fields=None):
166 """See CloudApi class for function doc strings."""
167 _ = provider
168 get_fields = self._ListToGetFields(list_fields=fields)
169 headers = {}
170 self._AddApiVersionToHeaders(headers)
171 if self.provider == 'gs':
172 headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
173 try:
174 provider_uri = boto.storage_uri(
175 '%s://' % self.provider,
176 suppress_consec_slashes=False,
177 bucket_storage_uri_class=self.bucket_storage_uri_class,
178 debug=self.debug)
179
180 buckets_iter = provider_uri.get_all_buckets(headers=headers)
181 for bucket in buckets_iter:
182 if self.provider == 's3' and bucket.name.lower() != bucket.name:
183 # S3 listings can return buckets with upper-case names, but boto
184 # can't successfully call them.
185 continue
186 yield self._BotoBucketToBucket(bucket, fields=get_fields)
187 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
188 self._TranslateExceptionAndRaise(e)
189
190 def PatchBucket(self, bucket_name, metadata, preconditions=None,
191 provider=None, fields=None):
192 """See CloudApi class for function doc strings."""
193 _ = provider
194 bucket_uri = self._StorageUriForBucket(bucket_name)
195 headers = {}
196 self._AddApiVersionToHeaders(headers)
197 try:
198 self._AddPreconditionsToHeaders(preconditions, headers)
199 if metadata.acl:
200 boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
201 bucket_uri.set_xml_acl(boto_acl.to_xml(), headers=headers)
202 if metadata.cors:
203 if metadata.cors == REMOVE_CORS_CONFIG:
204 metadata.cors = []
205 boto_cors = CorsTranslation.BotoCorsFromMessage(metadata.cors)
206 bucket_uri.set_cors(boto_cors, False)
207 if metadata.defaultObjectAcl:
208 boto_acl = AclTranslation.BotoAclFromMessage(
209 metadata.defaultObjectAcl)
210 bucket_uri.set_def_xml_acl(boto_acl.to_xml(), headers=headers)
211 if metadata.lifecycle:
212 boto_lifecycle = LifecycleTranslation.BotoLifecycleFromMessage(
213 metadata.lifecycle)
214 bucket_uri.configure_lifecycle(boto_lifecycle, False)
215 if metadata.logging:
216 if self.provider == 'gs':
217 headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(None)
218 if metadata.logging.logBucket and metadata.logging.logObjectPrefix:
219 bucket_uri.enable_logging(metadata.logging.logBucket,
220 metadata.logging.logObjectPrefix,
221 False, headers)
222 else: # Logging field is present and empty. Disable logging.
223 bucket_uri.disable_logging(False, headers)
224 if metadata.versioning:
225 bucket_uri.configure_versioning(metadata.versioning.enabled,
226 headers=headers)
227 if metadata.website:
228 main_page_suffix = metadata.website.mainPageSuffix
229 error_page = metadata.website.notFoundPage
230 bucket_uri.set_website_config(main_page_suffix, error_page)
231 return self.GetBucket(bucket_name, fields=fields)
232 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
233 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
234
235 def CreateBucket(self, bucket_name, project_id=None, metadata=None,
236 provider=None, fields=None):
237 """See CloudApi class for function doc strings."""
238 _ = provider
239 bucket_uri = self._StorageUriForBucket(bucket_name)
240 location = ''
241 if metadata and metadata.location:
242 location = metadata.location
243 # Pass storage_class param only if this is a GCS bucket. (In S3 the
244 # storage class is specified on the key object.)
245 headers = {}
246 if bucket_uri.scheme == 'gs':
247 self._AddApiVersionToHeaders(headers)
248 headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
249 storage_class = ''
250 if metadata and metadata.storageClass:
251 storage_class = metadata.storageClass
252 try:
253 bucket_uri.create_bucket(headers=headers, location=location,
254 storage_class=storage_class)
255 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
256 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
257 else:
258 try:
259 bucket_uri.create_bucket(headers=headers, location=location)
260 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
261 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
262 return self.GetBucket(bucket_name, fields=fields)
263
264 def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
265 """See CloudApi class for function doc strings."""
266 _ = provider, preconditions
267 bucket_uri = self._StorageUriForBucket(bucket_name)
268 headers = {}
269 self._AddApiVersionToHeaders(headers)
270 try:
271 bucket_uri.delete_bucket(headers=headers)
272 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
273 translated_exception = self._TranslateBotoException(
274 e, bucket_name=bucket_name)
275 if (translated_exception and
276 'BucketNotEmpty' in translated_exception.reason):
277 try:
278 if bucket_uri.get_versioning_config():
279 if self.provider == 's3':
280 raise NotEmptyException(
281 'VersionedBucketNotEmpty (%s). Currently, gsutil does not '
282 'support listing or removing S3 DeleteMarkers, so you may '
283 'need to delete these using another tool to successfully '
284 'delete this bucket.' % bucket_name, status=e.status)
285 raise NotEmptyException(
286 'VersionedBucketNotEmpty (%s)' % bucket_name, status=e.status)
287 else:
288 raise NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
289 status=e.status)
290 except TRANSLATABLE_BOTO_EXCEPTIONS, e2:
291 self._TranslateExceptionAndRaise(e2, bucket_name=bucket_name)
292 elif translated_exception and translated_exception.status == 404:
293 raise NotFoundException('Bucket %s does not exist.' % bucket_name)
294 else:
295 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
296
297 def ListObjects(self, bucket_name, prefix=None, delimiter=None,
298 all_versions=None, provider=None, fields=None):
299 """See CloudApi class for function doc strings."""
300 _ = provider
301 get_fields = self._ListToGetFields(list_fields=fields)
302 bucket_uri = self._StorageUriForBucket(bucket_name)
303 prefix_list = []
304 headers = {}
305 self._AddApiVersionToHeaders(headers)
306 try:
307 objects_iter = bucket_uri.list_bucket(prefix=prefix or '',
308 delimiter=delimiter or '',
309 all_versions=all_versions,
310 headers=headers)
311 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
312 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
313
314 try:
315 for key in objects_iter:
316 if isinstance(key, Prefix):
317 prefix_list.append(key.name)
318 yield CloudApi.CsObjectOrPrefix(key.name,
319 CloudApi.CsObjectOrPrefixType.PREFIX)
320 else:
321 key_to_convert = key
322
323 # Listed keys are populated with these fields during bucket listing.
324 key_http_fields = set(['bucket', 'etag', 'name', 'updated',
325 'generation', 'metageneration', 'size'])
326
327 # When fields == None, the caller is requesting all possible fields.
328 # If the caller requested any fields that are not populated by bucket
329 # listing, we'll need to make a separate HTTP call for each object to
330 # get its metadata and populate the remaining fields with the result.
331 if not get_fields or (get_fields and not
332 get_fields.issubset(key_http_fields)):
333
334 generation = None
335 if getattr(key, 'generation', None):
336 generation = key.generation
337 if getattr(key, 'version_id', None):
338 generation = key.version_id
339 key_to_convert = self._GetBotoKey(bucket_name, key.name,
340 generation=generation)
341 return_object = self._BotoKeyToObject(key_to_convert,
342 fields=get_fields)
343
344 yield CloudApi.CsObjectOrPrefix(return_object,
345 CloudApi.CsObjectOrPrefixType.OBJECT)
346 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
347 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
348
349 def GetObjectMetadata(self, bucket_name, object_name, generation=None,
350 provider=None, fields=None):
351 """See CloudApi class for function doc strings."""
352 _ = provider
353 try:
354 return self._BotoKeyToObject(self._GetBotoKey(bucket_name, object_name,
355 generation=generation),
356 fields=fields)
357 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
358 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
359 object_name=object_name,
360 generation=generation)
361
362 def _CurryDigester(self, digester_object):
363 """Curries a digester object into a form consumable by boto.
364
365 Key instantiates its own digesters by calling hash_algs[alg]() [note there
366 are no arguments to this function]. So in order to pass in our caught-up
367 digesters during a resumable download, we need to pass the digester
368 object but don't get to look it up based on the algorithm name. Here we
369 use a lambda to make lookup implicit.
370
371 Args:
372 digester_object: Input object to be returned by the created function.
373
374 Returns:
375 A function which when called will return the input object.
376 """
377 return lambda: digester_object
378
379 def GetObjectMedia(
380 self, bucket_name, object_name, download_stream, provider=None,
381 generation=None, object_size=None,
382 download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
383 start_byte=0, end_byte=None, progress_callback=None,
384 serialization_data=None, digesters=None):
385 """See CloudApi class for function doc strings."""
386 # This implementation will get the object metadata first if we don't pass it
387 # in via serialization_data.
388 headers = {}
389 self._AddApiVersionToHeaders(headers)
390 if 'accept-encoding' not in headers:
391 headers['accept-encoding'] = 'gzip'
392 if end_byte:
393 headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte)
394 elif start_byte > 0:
395 headers['range'] = 'bytes=%s-' % (start_byte)
396 else:
397 headers['range'] = 'bytes=%s' % (start_byte)
398
399 # Since in most cases we already made a call to get the object metadata,
400 # here we avoid an extra HTTP call by unpickling the key. This is coupled
401 # with the implementation in _BotoKeyToObject.
402 if serialization_data:
403 serialization_dict = json.loads(serialization_data)
404 key = pickle.loads(binascii.a2b_base64(serialization_dict['url']))
405 else:
406 key = self._GetBotoKey(bucket_name, object_name, generation=generation)
407
408 if digesters and self.provider == 'gs':
409 hash_algs = {}
410 for alg in digesters:
411 hash_algs[alg] = self._CurryDigester(digesters[alg])
412 else:
413 hash_algs = {}
414
415 total_size = object_size or 0
416 if serialization_data:
417 total_size = json.loads(serialization_data)['total_size']
418
419 if download_strategy is CloudApi.DownloadStrategy.RESUMABLE:
420 try:
421 if total_size:
422 num_progress_callbacks = max(int(total_size) / TWO_MB,
423 XML_PROGRESS_CALLBACKS)
424 else:
425 num_progress_callbacks = XML_PROGRESS_CALLBACKS
426 self._PerformResumableDownload(
427 download_stream, key, headers=headers, callback=progress_callback,
428 num_callbacks=num_progress_callbacks, hash_algs=hash_algs)
429 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
430 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
431 object_name=object_name,
432 generation=generation)
433 elif download_strategy is CloudApi.DownloadStrategy.ONE_SHOT:
434 try:
435 self._PerformSimpleDownload(download_stream, key, headers=headers,
436 hash_algs=hash_algs)
437 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
438 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
439 object_name=object_name,
440 generation=generation)
441 else:
442 raise ArgumentException('Unsupported DownloadStrategy: %s' %
443 download_strategy)
444
445 if self.provider == 's3':
446 if digesters:
447
448 class HashToDigester(object):
449 """Wrapper class to expose hash digests.
450
451 boto creates its own digesters in s3's get_file, returning on-the-fly
452 hashes only by way of key.local_hashes. To propagate the digest back
453 to the caller, this stub class implements the digest() function.
454 """
455
456 def __init__(self, hash_val):
457 self.hash_val = hash_val
458
459 def digest(self): # pylint: disable=invalid-name
460 return self.hash_val
461
462 for alg_name in digesters:
463 if ((download_strategy == CloudApi.DownloadStrategy.RESUMABLE and
464 start_byte != 0) or
465 not ((getattr(key, 'local_hashes', None) and
466 alg_name in key.local_hashes))):
467 # For resumable downloads, boto does not provide a mechanism to
468 # catch up the hash in the case of a partially complete download.
469 # In this case or in the case where no digest was successfully
470 # calculated, set the digester to None, which indicates that we'll
471 # need to manually calculate the hash from the local file once it
472 # is complete.
473 digesters[alg_name] = None
474 else:
475 # Use the on-the-fly hash.
476 digesters[alg_name] = HashToDigester(key.local_hashes[alg_name])
477
478 def _PerformSimpleDownload(self, download_stream, key, headers=None,
479 hash_algs=None):
480 if not headers:
481 headers = {}
482 self._AddApiVersionToHeaders(headers)
483 try:
484 key.get_contents_to_file(download_stream, headers=headers,
485 hash_algs=hash_algs)
486 except TypeError: # s3 and mocks do not support hash_algs
487 key.get_contents_to_file(download_stream, headers=headers)
488
489 def _PerformResumableDownload(self, fp, key, headers=None, callback=None,
490 num_callbacks=XML_PROGRESS_CALLBACKS,
491 hash_algs=None):
492 """Downloads bytes from key to fp, resuming as needed.
493
494 Args:
495 fp: File pointer into which data should be downloaded
496 key: Key object from which data is to be downloaded
497 headers: Headers to send when retrieving the file
498 callback: (optional) a callback function that will be called to report
499 progress on the download. The callback should accept two integer
500 parameters. The first integer represents the number of
501 bytes that have been successfully transmitted from the service. The
502 second represents the total number of bytes that need to be
503 transmitted.
504 num_callbacks: (optional) If a callback is specified with the callback
505 parameter, this determines the granularity of the callback
506 by defining the maximum number of times the callback will be
507 called during the file transfer.
508 hash_algs: Dict of hash algorithms to apply to downloaded bytes.
509
510 Raises:
511 ResumableDownloadException on error.
512 """
513 if not headers:
514 headers = {}
515 self._AddApiVersionToHeaders(headers)
516
517 retryable_exceptions = (httplib.HTTPException, IOError, socket.error,
518 socket.gaierror)
519
520 debug = key.bucket.connection.debug
521
522 num_retries = GetNumRetries()
523 progress_less_iterations = 0
524
525 while True: # Retry as long as we're making progress.
526 had_file_bytes_before_attempt = GetFileSize(fp)
527 try:
528 cur_file_size = GetFileSize(fp, position_to_eof=True)
529
530 def DownloadProxyCallback(total_bytes_downloaded, total_size):
531 """Translates a boto callback into a gsutil Cloud API callback.
532
533 Callbacks are originally made by boto.s3.Key.get_file(); here we take
534 into account that we're resuming a download.
535
536 Args:
537 total_bytes_downloaded: Actual bytes downloaded so far, not
538 including the point we resumed from.
539 total_size: Total size of the download.
540 """
541 if callback:
542 callback(cur_file_size + total_bytes_downloaded, total_size)
543
544 headers = headers.copy()
545 headers['Range'] = 'bytes=%d-%d' % (cur_file_size, key.size - 1)
546 cb = DownloadProxyCallback
547
548 # Disable AWSAuthConnection-level retry behavior, since that would
549 # cause downloads to restart from scratch.
550 try:
551 key.get_file(fp, headers, cb, num_callbacks, override_num_retries=0,
552 hash_algs=hash_algs)
553 except TypeError:
554 key.get_file(fp, headers, cb, num_callbacks, override_num_retries=0)
555 fp.flush()
556 # Download succeeded.
557 return
558 except retryable_exceptions, e:
559 if debug >= 1:
560 self.logger.info('Caught exception (%s)', repr(e))
561 if isinstance(e, IOError) and e.errno == errno.EPIPE:
562 # Broken pipe error causes httplib to immediately
563 # close the socket (http://bugs.python.org/issue5542),
564 # so we need to close and reopen the key before resuming
565 # the download.
566 if self.provider == 's3':
567 key.get_file(fp, headers, cb, num_callbacks, override_num_retries=0)
568 else: # self.provider == 'gs'
569 key.get_file(fp, headers, cb, num_callbacks,
570 override_num_retries=0, hash_algs=hash_algs)
571 except BotoResumableDownloadException, e:
572 if (e.disposition ==
573 ResumableTransferDisposition.ABORT_CUR_PROCESS):
574 raise ResumableDownloadException(e.message)
575 else:
576 if debug >= 1:
577 self.logger.info('Caught ResumableDownloadException (%s) - will '
578 'retry', e.message)
579
580 # At this point we had a re-tryable failure; see if made progress.
581 if GetFileSize(fp) > had_file_bytes_before_attempt:
582 progress_less_iterations = 0
583 else:
584 progress_less_iterations += 1
585
586 if progress_less_iterations > num_retries:
587 # Don't retry any longer in the current process.
588 raise ResumableDownloadException(
589 'Too many resumable download attempts failed without '
590 'progress. You might try this download again later')
591
592 # Close the key, in case a previous download died partway
593 # through and left data in the underlying key HTTP buffer.
594 # Do this within a try/except block in case the connection is
595 # closed (since key.close() attempts to do a final read, in which
596 # case this read attempt would get an IncompleteRead exception,
597 # which we can safely ignore).
598 try:
599 key.close()
600 except httplib.IncompleteRead:
601 pass
602
603 sleep_time_secs = min(random.random() * (2 ** progress_less_iterations),
604 GetMaxRetryDelay())
605 if debug >= 1:
606 self.logger.info(
607 'Got retryable failure (%d progress-less in a row).\nSleeping %d '
608 'seconds before re-trying', progress_less_iterations,
609 sleep_time_secs)
610 time.sleep(sleep_time_secs)
611
612 def PatchObjectMetadata(self, bucket_name, object_name, metadata,
613 generation=None, preconditions=None, provider=None,
614 fields=None):
615 """See CloudApi class for function doc strings."""
616 _ = provider
617 object_uri = self._StorageUriForObject(bucket_name, object_name,
618 generation=generation)
619
620 headers = {}
621 self._AddApiVersionToHeaders(headers)
622 meta_headers = HeadersFromObjectMetadata(metadata, self.provider)
623
624 metadata_plus = {}
625 metadata_minus = set()
626 metadata_changed = False
627 for k, v in meta_headers.iteritems():
628 metadata_changed = True
629 if v is None:
630 metadata_minus.add(k)
631 else:
632 metadata_plus[k] = v
633
634 self._AddPreconditionsToHeaders(preconditions, headers)
635
636 if metadata_changed:
637 try:
638 object_uri.set_metadata(metadata_plus, metadata_minus, False,
639 headers=headers)
640 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
641 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
642 object_name=object_name,
643 generation=generation)
644
645 if metadata.acl:
646 boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
647 try:
648 object_uri.set_xml_acl(boto_acl.to_xml(), key_name=object_name)
649 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
650 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
651 object_name=object_name,
652 generation=generation)
653 return self.GetObjectMetadata(bucket_name, object_name,
654 generation=generation, fields=fields)
655
656 def _PerformSimpleUpload(self, dst_uri, upload_stream, md5=None,
657 canned_acl=None, progress_callback=None,
658 headers=None):
659 dst_uri.set_contents_from_file(upload_stream, md5=md5, policy=canned_acl,
660 cb=progress_callback, headers=headers)
661
662 def _PerformStreamingUpload(self, dst_uri, upload_stream, canned_acl=None,
663 progress_callback=None, headers=None):
664 if dst_uri.get_provider().supports_chunked_transfer():
665 dst_uri.set_contents_from_stream(upload_stream, policy=canned_acl,
666 cb=progress_callback, headers=headers)
667 else:
668 # Provider doesn't support chunked transfer, so copy to a temporary
669 # file.
670 (temp_fh, temp_path) = tempfile.mkstemp()
671 try:
672 with open(temp_path, 'wb') as out_fp:
673 stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
674 while stream_bytes:
675 out_fp.write(stream_bytes)
676 stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
677 with open(temp_path, 'rb') as in_fp:
678 dst_uri.set_contents_from_file(in_fp, policy=canned_acl,
679 headers=headers)
680 finally:
681 os.close(temp_fh)
682 os.unlink(temp_path)
683
684 def _PerformResumableUpload(self, key, upload_stream, upload_size,
685 tracker_callback, canned_acl=None,
686 serialization_data=None, progress_callback=None,
687 headers=None):
688 resumable_upload = BotoResumableUpload(
689 tracker_callback, self.logger, resume_url=serialization_data)
690 resumable_upload.SendFile(key, upload_stream, upload_size,
691 canned_acl=canned_acl, cb=progress_callback,
692 headers=headers)
693
694 def _UploadSetup(self, object_metadata, preconditions=None):
695 """Shared upload implementation.
696
697 Args:
698 object_metadata: Object metadata describing destination object.
699 preconditions: Optional gsutil Cloud API preconditions.
700
701 Returns:
702 Headers dictionary, StorageUri for upload (based on inputs)
703 """
704 ValidateDstObjectMetadata(object_metadata)
705
706 headers = HeadersFromObjectMetadata(object_metadata, self.provider)
707 self._AddApiVersionToHeaders(headers)
708
709 if object_metadata.crc32c:
710 if 'x-goog-hash' in headers:
711 headers['x-goog-hash'] += (
712 ',crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
713 else:
714 headers['x-goog-hash'] = (
715 'crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
716 if object_metadata.md5Hash:
717 if 'x-goog-hash' in headers:
718 headers['x-goog-hash'] += (
719 ',md5=%s' % object_metadata.md5Hash.rstrip('\n'))
720 else:
721 headers['x-goog-hash'] = (
722 'md5=%s' % object_metadata.md5Hash.rstrip('\n'))
723
724 if 'content-type' in headers and not headers['content-type']:
725 headers['content-type'] = 'application/octet-stream'
726
727 self._AddPreconditionsToHeaders(preconditions, headers)
728
729 dst_uri = self._StorageUriForObject(object_metadata.bucket,
730 object_metadata.name)
731 return headers, dst_uri
732
733 def _HandleSuccessfulUpload(self, dst_uri, object_metadata, fields=None):
734 """Set ACLs on an uploaded object and return its metadata.
735
736 Args:
737 dst_uri: Generation-specific StorageUri describing the object.
738 object_metadata: Metadata for the object, including an ACL if applicable.
739 fields: If present, return only these Object metadata fields.
740
741 Returns:
742 gsutil Cloud API Object metadata.
743
744 Raises:
745 CommandException if the object was overwritten / deleted concurrently.
746 """
747 try:
748 # The XML API does not support if-generation-match for GET requests.
749 # Therefore, if the object gets overwritten before the ACL and get_key
750 # operations, the best we can do is warn that it happened.
751 self._SetObjectAcl(object_metadata, dst_uri)
752 return self._BotoKeyToObject(dst_uri.get_key(), fields=fields)
753 except boto.exception.InvalidUriError as e:
754 check_for_str = 'Attempt to get key for "%s" failed.' % dst_uri.uri
755 if check_for_str in e.message:
756 raise CommandException('\n'.join(textwrap.wrap(
757 'Uploaded object (%s) was deleted or overwritten immediately '
758 'after it was uploaded. This can happen if you attempt to upload '
759 'to the same object multiple times concurrently.' % dst_uri.uri)))
760 else:
761 raise
762
763 def _SetObjectAcl(self, object_metadata, dst_uri):
764 """Sets the ACL (if present in object_metadata) on an uploaded object."""
765 if object_metadata.acl:
766 boto_acl = AclTranslation.BotoAclFromMessage(object_metadata.acl)
767 dst_uri.set_xml_acl(boto_acl.to_xml())
768 elif self.provider == 's3':
769 s3_acl = S3MarkerAclFromObjectMetadata(object_metadata)
770 if s3_acl:
771 dst_uri.set_xml_acl(s3_acl)
772
773 def UploadObjectResumable(
774 self, upload_stream, object_metadata, canned_acl=None, preconditions=None,
775 provider=None, fields=None, size=None, serialization_data=None,
776 tracker_callback=None, progress_callback=None):
777 """See CloudApi class for function doc strings."""
778 if self.provider == 's3':
779 # Resumable uploads are not supported for s3.
780 return self.UploadObject(
781 upload_stream, object_metadata, canned_acl=canned_acl,
782 preconditions=preconditions, fields=fields, size=size)
783 headers, dst_uri = self._UploadSetup(object_metadata,
784 preconditions=preconditions)
785 if not tracker_callback:
786 raise ArgumentException('No tracker callback function set for '
787 'resumable upload of %s' % dst_uri)
788 try:
789 self._PerformResumableUpload(dst_uri.new_key(headers=headers),
790 upload_stream, size, tracker_callback,
791 canned_acl=canned_acl,
792 serialization_data=serialization_data,
793 progress_callback=progress_callback,
794 headers=headers)
795 return self._HandleSuccessfulUpload(dst_uri, object_metadata,
796 fields=fields)
797 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
798 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
799 object_name=object_metadata.name)
800
801 def UploadObjectStreaming(self, upload_stream, object_metadata,
802 canned_acl=None, progress_callback=None,
803 preconditions=None, provider=None, fields=None):
804 """See CloudApi class for function doc strings."""
805 headers, dst_uri = self._UploadSetup(object_metadata,
806 preconditions=preconditions)
807
808 try:
809 self._PerformStreamingUpload(
810 dst_uri, upload_stream, canned_acl=canned_acl,
811 progress_callback=progress_callback, headers=headers)
812 return self._HandleSuccessfulUpload(dst_uri, object_metadata,
813 fields=fields)
814 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
815 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
816 object_name=object_metadata.name)
817
818 def UploadObject(self, upload_stream, object_metadata, canned_acl=None,
819 preconditions=None, size=None, progress_callback=None,
820 provider=None, fields=None):
821 """See CloudApi class for function doc strings."""
822 headers, dst_uri = self._UploadSetup(object_metadata,
823 preconditions=preconditions)
824
825 try:
826 md5 = None
827 if object_metadata.md5Hash:
828 md5 = []
829 # boto expects hex at index 0, base64 at index 1
830 md5.append(binascii.hexlify(
831 base64.decodestring(object_metadata.md5Hash.strip('\n"\''))))
832 md5.append(object_metadata.md5Hash.strip('\n"\''))
833 self._PerformSimpleUpload(dst_uri, upload_stream, md5=md5,
834 canned_acl=canned_acl,
835 progress_callback=progress_callback,
836 headers=headers)
837 return self._HandleSuccessfulUpload(dst_uri, object_metadata,
838 fields=fields)
839 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
840 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
841 object_name=object_metadata.name)
842
843 def DeleteObject(self, bucket_name, object_name, preconditions=None,
844 generation=None, provider=None):
845 """See CloudApi class for function doc strings."""
846 _ = provider
847 headers = {}
848 self._AddApiVersionToHeaders(headers)
849 self._AddPreconditionsToHeaders(preconditions, headers)
850
851 uri = self._StorageUriForObject(bucket_name, object_name,
852 generation=generation)
853 try:
854 uri.delete_key(validate=False, headers=headers)
855 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
856 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
857 object_name=object_name,
858 generation=generation)
859
860 def CopyObject(self, src_bucket_name, src_obj_name, dst_obj_metadata,
861 src_generation=None, canned_acl=None, preconditions=None,
862 provider=None, fields=None):
863 """See CloudApi class for function doc strings."""
864 _ = provider
865 dst_uri = self._StorageUriForObject(dst_obj_metadata.bucket,
866 dst_obj_metadata.name)
867
868 # Usually it's okay to treat version_id and generation as
869 # the same, but in this case the underlying boto call determines the
870 # provider based on the presence of one or the other.
871 src_version_id = None
872 if self.provider == 's3':
873 src_version_id = src_generation
874 src_generation = None
875
876 headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
877 self._AddApiVersionToHeaders(headers)
878 self._AddPreconditionsToHeaders(preconditions, headers)
879
880 if canned_acl:
881 headers[dst_uri.get_provider().acl_header] = canned_acl
882
883 preserve_acl = True if dst_obj_metadata.acl else False
884 if self.provider == 's3':
885 s3_acl = S3MarkerAclFromObjectMetadata(dst_obj_metadata)
886 if s3_acl:
887 preserve_acl = True
888
889 try:
890 new_key = dst_uri.copy_key(
891 src_bucket_name, src_obj_name, preserve_acl=preserve_acl,
892 headers=headers, src_version_id=src_version_id,
893 src_generation=src_generation)
894
895 return self._BotoKeyToObject(new_key, fields=fields)
896 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
897 self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket,
898 dst_obj_metadata.name)
899
900 def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
901 preconditions=None, provider=None, fields=None):
902 """See CloudApi class for function doc strings."""
903 _ = provider
904 ValidateDstObjectMetadata(dst_obj_metadata)
905
906 dst_obj_name = dst_obj_metadata.name
907 dst_obj_metadata.name = None
908 dst_bucket_name = dst_obj_metadata.bucket
909 dst_obj_metadata.bucket = None
910 headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
911 if not dst_obj_metadata.contentType:
912 dst_obj_metadata.contentType = DEFAULT_CONTENT_TYPE
913 headers['content-type'] = dst_obj_metadata.contentType
914 self._AddApiVersionToHeaders(headers)
915 self._AddPreconditionsToHeaders(preconditions, headers)
916
917 dst_uri = self._StorageUriForObject(dst_bucket_name, dst_obj_name)
918
919 src_components = []
920 for src_obj in src_objs_metadata:
921 src_uri = self._StorageUriForObject(dst_bucket_name, src_obj.name,
922 generation=src_obj.generation)
923 src_components.append(src_uri)
924
925 try:
926 dst_uri.compose(src_components, headers=headers)
927
928 return self.GetObjectMetadata(dst_bucket_name, dst_obj_name,
929 fields=fields)
930 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
931 self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket,
932 dst_obj_metadata.name)
933
934 def _AddPreconditionsToHeaders(self, preconditions, headers):
935 """Adds preconditions (if any) to headers."""
936 if preconditions and self.provider == 'gs':
937 if preconditions.gen_match:
938 headers['x-goog-if-generation-match'] = preconditions.gen_match
939 if preconditions.meta_gen_match:
940 headers['x-goog-if-metageneration-match'] = preconditions.meta_gen_match
941
942 def _AddApiVersionToHeaders(self, headers):
943 if self.provider == 'gs':
944 headers['x-goog-api-version'] = self.api_version
945
946 def _GetMD5FromETag(self, src_etag):
947 """Returns an MD5 from the etag iff the etag is a valid MD5 hash.
948
949 Args:
950 src_etag: Object etag for which to return the MD5.
951
952 Returns:
953 MD5 in hex string format, or None.
954 """
955 if src_etag and MD5_REGEX.search(src_etag):
956 return src_etag.strip('"\'').lower()
957
958 def _StorageUriForBucket(self, bucket):
959 """Returns a boto storage_uri for the given bucket name.
960
961 Args:
962 bucket: Bucket name (string).
963
964 Returns:
965 Boto storage_uri for the bucket.
966 """
967 return boto.storage_uri(
968 '%s://%s' % (self.provider, bucket),
969 suppress_consec_slashes=False,
970 bucket_storage_uri_class=self.bucket_storage_uri_class,
971 debug=self.debug)
972
973 def _StorageUriForObject(self, bucket, object_name, generation=None):
974 """Returns a boto storage_uri for the given object.
975
976 Args:
977 bucket: Bucket name (string).
978 object_name: Object name (string).
979 generation: Generation or version_id of object. If None, live version
980 of the object is used.
981
982 Returns:
983 Boto storage_uri for the object.
984 """
985 uri_string = '%s://%s/%s' % (self.provider, bucket, object_name)
986 if generation:
987 uri_string += '#%s' % generation
988 return boto.storage_uri(
989 uri_string, suppress_consec_slashes=False,
990 bucket_storage_uri_class=self.bucket_storage_uri_class,
991 debug=self.debug)
992
993 def _GetBotoKey(self, bucket_name, object_name, generation=None):
994 """Gets the boto key for an object.
995
996 Args:
997 bucket_name: Bucket containing the object.
998 object_name: Object name.
999 generation: Generation or version of the object to retrieve.
1000
1001 Returns:
1002 Boto key for the object.
1003 """
1004 object_uri = self._StorageUriForObject(bucket_name, object_name,
1005 generation=generation)
1006 try:
1007 key = object_uri.get_key()
1008 if not key:
1009 raise CreateObjectNotFoundException('404', self.provider,
1010 bucket_name, object_name,
1011 generation=generation)
1012 return key
1013 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1014 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
1015 object_name=object_name,
1016 generation=generation)
1017
1018 def _ListToGetFields(self, list_fields=None):
1019 """Removes 'items/' from the input fields and converts it to a set.
1020
1021 This way field sets requested for ListBucket/ListObject can be used in
1022 _BotoBucketToBucket and _BotoKeyToObject calls.
1023
1024 Args:
1025 list_fields: Iterable fields usable in ListBucket/ListObject calls.
1026
1027 Returns:
1028 Set of fields usable in GetBucket/GetObject or
1029 _BotoBucketToBucket/_BotoKeyToObject calls.
1030 """
1031 if list_fields:
1032 get_fields = set()
1033 for field in list_fields:
1034 if field in ['kind', 'nextPageToken', 'prefixes']:
1035 # These are not actually object / bucket metadata fields.
1036 # They are fields specific to listing, so we don't consider them.
1037 continue
1038 get_fields.add(re.sub(r'items/', '', field))
1039 return get_fields
1040
1041 # pylint: disable=too-many-statements
1042 def _BotoBucketToBucket(self, bucket, fields=None):
1043 """Constructs an apitools Bucket from a boto bucket.
1044
1045 Args:
1046 bucket: Boto bucket.
1047 fields: If present, construct the apitools Bucket with only this set of
1048 metadata fields.
1049
1050 Returns:
1051 apitools Bucket.
1052 """
1053 bucket_uri = self._StorageUriForBucket(bucket.name)
1054
1055 cloud_api_bucket = apitools_messages.Bucket(name=bucket.name,
1056 id=bucket.name)
1057 headers = {}
1058 self._AddApiVersionToHeaders(headers)
1059 if self.provider == 'gs':
1060 if not fields or 'storageClass' in fields:
1061 if hasattr(bucket, 'get_storage_class'):
1062 cloud_api_bucket.storageClass = bucket.get_storage_class()
1063 if not fields or 'acl' in fields:
1064 for acl in AclTranslation.BotoBucketAclToMessage(
1065 bucket.get_acl(headers=headers)):
1066 try:
1067 cloud_api_bucket.acl.append(acl)
1068 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1069 translated_exception = self._TranslateBotoException(
1070 e, bucket_name=bucket.name)
1071 if (translated_exception and
1072 isinstance(translated_exception,
1073 AccessDeniedException)):
1074 # JSON API doesn't differentiate between a blank ACL list
1075 # and an access denied, so this is intentionally left blank.
1076 pass
1077 else:
1078 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1079 if not fields or 'cors' in fields:
1080 try:
1081 boto_cors = bucket_uri.get_cors()
1082 cloud_api_bucket.cors = CorsTranslation.BotoCorsToMessage(boto_cors)
1083 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1084 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1085 if not fields or 'defaultObjectAcl' in fields:
1086 for acl in AclTranslation.BotoObjectAclToMessage(
1087 bucket.get_def_acl(headers=headers)):
1088 try:
1089 cloud_api_bucket.defaultObjectAcl.append(acl)
1090 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1091 translated_exception = self._TranslateBotoException(
1092 e, bucket_name=bucket.name)
1093 if (translated_exception and
1094 isinstance(translated_exception,
1095 AccessDeniedException)):
1096 # JSON API doesn't differentiate between a blank ACL list
1097 # and an access denied, so this is intentionally left blank.
1098 pass
1099 else:
1100 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1101 if not fields or 'lifecycle' in fields:
1102 try:
1103 boto_lifecycle = bucket_uri.get_lifecycle_config()
1104 cloud_api_bucket.lifecycle = (
1105 LifecycleTranslation.BotoLifecycleToMessage(boto_lifecycle))
1106 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1107 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1108 if not fields or 'logging' in fields:
1109 try:
1110 boto_logging = bucket_uri.get_logging_config()
1111 if boto_logging and 'Logging' in boto_logging:
1112 logging_config = boto_logging['Logging']
1113 cloud_api_bucket.logging = apitools_messages.Bucket.LoggingValue()
1114 if 'LogObjectPrefix' in logging_config:
1115 cloud_api_bucket.logging.logObjectPrefix = (
1116 logging_config['LogObjectPrefix'])
1117 if 'LogBucket' in logging_config:
1118 cloud_api_bucket.logging.logBucket = logging_config['LogBucket']
1119 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1120 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1121 if not fields or 'website' in fields:
1122 try:
1123 boto_website = bucket_uri.get_website_config()
1124 if boto_website and 'WebsiteConfiguration' in boto_website:
1125 website_config = boto_website['WebsiteConfiguration']
1126 cloud_api_bucket.website = apitools_messages.Bucket.WebsiteValue()
1127 if 'MainPageSuffix' in website_config:
1128 cloud_api_bucket.website.mainPageSuffix = (
1129 website_config['MainPageSuffix'])
1130 if 'NotFoundPage' in website_config:
1131 cloud_api_bucket.website.notFoundPage = (
1132 website_config['NotFoundPage'])
1133 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1134 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1135 if not fields or 'versioning' in fields:
1136 versioning = bucket_uri.get_versioning_config(headers=headers)
1137 if versioning:
1138 if (self.provider == 's3' and 'Versioning' in versioning and
1139 versioning['Versioning'] == 'Enabled'):
1140 cloud_api_bucket.versioning = (
1141 apitools_messages.Bucket.VersioningValue(enabled=True))
1142 elif self.provider == 'gs':
1143 cloud_api_bucket.versioning = (
1144 apitools_messages.Bucket.VersioningValue(enabled=True))
1145
1146 # For S3 long bucket listing we do not support CORS, lifecycle, website, and
1147 # logging translation. The individual commands can be used to get
1148 # the XML equivalents for S3.
1149 return cloud_api_bucket
1150
1151 def _BotoKeyToObject(self, key, fields=None):
1152 """Constructs an apitools Object from a boto key.
1153
1154 Args:
1155 key: Boto key to construct Object from.
1156 fields: If present, construct the apitools Object with only this set of
1157 metadata fields.
1158
1159 Returns:
1160 apitools Object corresponding to key.
1161 """
1162 custom_metadata = None
1163 if not fields or 'metadata' in fields:
1164 custom_metadata = self._TranslateBotoKeyCustomMetadata(key)
1165 cache_control = None
1166 if not fields or 'cacheControl' in fields:
1167 cache_control = getattr(key, 'cache_control', None)
1168 component_count = None
1169 if not fields or 'componentCount' in fields:
1170 component_count = getattr(key, 'component_count', None)
1171 content_disposition = None
1172 if not fields or 'contentDisposition' in fields:
1173 content_disposition = getattr(key, 'content_disposition', None)
1174 # Other fields like updated and ACL depend on the generation
1175 # of the object, so populate that regardless of whether it was requested.
1176 generation = self._TranslateBotoKeyGeneration(key)
1177 metageneration = None
1178 if not fields or 'metageneration' in fields:
1179 metageneration = self._TranslateBotoKeyMetageneration(key)
1180 updated = None
1181 # Translation code to avoid a dependency on dateutil.
1182 if not fields or 'updated' in fields:
1183 updated = self._TranslateBotoKeyTimestamp(key)
1184 etag = None
1185 if not fields or 'etag' in fields:
1186 etag = getattr(key, 'etag', None)
1187 if etag:
1188 etag = etag.strip('"\'')
1189 crc32c = None
1190 if not fields or 'crc32c' in fields:
1191 if hasattr(key, 'cloud_hashes') and 'crc32c' in key.cloud_hashes:
1192 crc32c = base64.encodestring(key.cloud_hashes['crc32c']).rstrip('\n')
1193 md5_hash = None
1194 if not fields or 'md5Hash' in fields:
1195 if hasattr(key, 'cloud_hashes') and 'md5' in key.cloud_hashes:
1196 md5_hash = base64.encodestring(key.cloud_hashes['md5']).rstrip('\n')
1197 elif self._GetMD5FromETag(getattr(key, 'etag', None)):
1198 md5_hash = base64.encodestring(
1199 binascii.unhexlify(self._GetMD5FromETag(key.etag))).rstrip('\n')
1200 elif self.provider == 's3':
1201 # S3 etags are MD5s for non-multi-part objects, but multi-part objects
1202 # (which include all objects >= 5GB) have a custom checksum
1203 # implementation that is not currently supported by gsutil.
1204 self.logger.warn(
1205 'Non-MD5 etag (%s) present for key %s, data integrity checks are '
1206 'not possible.', key.etag, key)
1207
1208 # Serialize the boto key in the media link if it is requested. This
1209 # way we can later access the key without adding an HTTP call.
1210 media_link = None
1211 if not fields or 'mediaLink' in fields:
1212 media_link = binascii.b2a_base64(
1213 pickle.dumps(key, pickle.HIGHEST_PROTOCOL))
1214 size = None
1215 if not fields or 'size' in fields:
1216 size = key.size or 0
1217
1218 cloud_api_object = apitools_messages.Object(
1219 bucket=key.bucket.name,
1220 name=key.name,
1221 size=size,
1222 contentEncoding=key.content_encoding,
1223 contentLanguage=key.content_language,
1224 contentType=key.content_type,
1225 cacheControl=cache_control,
1226 contentDisposition=content_disposition,
1227 etag=etag,
1228 crc32c=crc32c,
1229 md5Hash=md5_hash,
1230 generation=generation,
1231 metageneration=metageneration,
1232 componentCount=component_count,
1233 updated=updated,
1234 metadata=custom_metadata,
1235 mediaLink=media_link)
1236
1237 # Remaining functions amend cloud_api_object.
1238 self._TranslateDeleteMarker(key, cloud_api_object)
1239 if not fields or 'acl' in fields:
1240 generation_str = GenerationFromUrlAndString(
1241 StorageUrlFromString(self.provider), generation)
1242 self._TranslateBotoKeyAcl(key, cloud_api_object,
1243 generation=generation_str)
1244
1245 return cloud_api_object
1246
1247 def _TranslateBotoKeyCustomMetadata(self, key):
1248 """Populates an apitools message from custom metadata in the boto key."""
1249 custom_metadata = None
1250 if getattr(key, 'metadata', None):
1251 custom_metadata = apitools_messages.Object.MetadataValue(
1252 additionalProperties=[])
1253 for k, v in key.metadata.iteritems():
1254 if k.lower() == 'content-language':
1255 # Work around content-language being inserted into custom metadata.
1256 continue
1257 custom_metadata.additionalProperties.append(
1258 apitools_messages.Object.MetadataValue.AdditionalProperty(
1259 key=k, value=v))
1260 return custom_metadata
1261
1262 def _TranslateBotoKeyGeneration(self, key):
1263 """Returns the generation/version_id number from the boto key if present."""
1264 generation = None
1265 if self.provider == 'gs':
1266 if getattr(key, 'generation', None):
1267 generation = long(key.generation)
1268 elif self.provider == 's3':
1269 if getattr(key, 'version_id', None):
1270 generation = EncodeStringAsLong(key.version_id)
1271 return generation
1272
1273 def _TranslateBotoKeyMetageneration(self, key):
1274 """Returns the metageneration number from the boto key if present."""
1275 metageneration = None
1276 if self.provider == 'gs':
1277 if getattr(key, 'metageneration', None):
1278 metageneration = long(key.metageneration)
1279 return metageneration
1280
1281 def _TranslateBotoKeyTimestamp(self, key):
1282 """Parses the timestamp from the boto key into an datetime object.
1283
1284 This avoids a dependency on dateutil.
1285
1286 Args:
1287 key: Boto key to get timestamp from.
1288
1289 Returns:
1290 datetime object if string is parsed successfully, None otherwise.
1291 """
1292 if key.last_modified:
1293 if '.' in key.last_modified:
1294 key_us_timestamp = key.last_modified.rstrip('Z') + '000Z'
1295 else:
1296 key_us_timestamp = key.last_modified.rstrip('Z') + '.000000Z'
1297 fmt = '%Y-%m-%dT%H:%M:%S.%fZ'
1298 try:
1299 return datetime.datetime.strptime(key_us_timestamp, fmt)
1300 except ValueError:
1301 try:
1302 # Try alternate format
1303 fmt = '%a, %d %b %Y %H:%M:%S %Z'
1304 return datetime.datetime.strptime(key.last_modified, fmt)
1305 except ValueError:
1306 # Could not parse the time; leave updated as None.
1307 return None
1308
1309 def _TranslateDeleteMarker(self, key, cloud_api_object):
1310 """Marks deleted objects with a metadata value (for S3 compatibility)."""
1311 if isinstance(key, DeleteMarker):
1312 if not cloud_api_object.metadata:
1313 cloud_api_object.metadata = apitools_messages.Object.MetadataValue()
1314 cloud_api_object.metadata.additionalProperties = []
1315 cloud_api_object.metadata.additionalProperties.append(
1316 apitools_messages.Object.MetadataValue.AdditionalProperty(
1317 key=S3_DELETE_MARKER_GUID, value=True))
1318
1319 def _TranslateBotoKeyAcl(self, key, cloud_api_object, generation=None):
1320 """Updates cloud_api_object with the ACL from the boto key."""
1321 storage_uri_for_key = self._StorageUriForObject(key.bucket.name, key.name,
1322 generation=generation)
1323 headers = {}
1324 self._AddApiVersionToHeaders(headers)
1325 try:
1326 if self.provider == 'gs':
1327 key_acl = storage_uri_for_key.get_acl(headers=headers)
1328 # key.get_acl() does not support versioning so we need to use
1329 # storage_uri to ensure we're getting the versioned ACL.
1330 for acl in AclTranslation.BotoObjectAclToMessage(key_acl):
1331 cloud_api_object.acl.append(acl)
1332 if self.provider == 's3':
1333 key_acl = key.get_xml_acl(headers=headers)
1334 # ACLs for s3 are different and we use special markers to represent
1335 # them in the gsutil Cloud API.
1336 AddS3MarkerAclToObjectMetadata(cloud_api_object, key_acl)
1337 except boto.exception.GSResponseError, e:
1338 if e.status == 403:
1339 # Consume access denied exceptions to mimic JSON behavior of simply
1340 # returning None if sufficient permission is not present. The caller
1341 # needs to handle the case where the ACL is not populated.
1342 pass
1343 else:
1344 raise
1345
1346 def _TranslateExceptionAndRaise(self, e, bucket_name=None, object_name=None,
1347 generation=None):
1348 """Translates a Boto exception and raises the translated or original value.
1349
1350 Args:
1351 e: Any Exception.
1352 bucket_name: Optional bucket name in request that caused the exception.
1353 object_name: Optional object name in request that caused the exception.
1354 generation: Optional generation in request that caused the exception.
1355
1356 Raises:
1357 Translated CloudApi exception, or the original exception if it was not
1358 translatable.
1359 """
1360 translated_exception = self._TranslateBotoException(
1361 e, bucket_name=bucket_name, object_name=object_name,
1362 generation=generation)
1363 if translated_exception:
1364 raise translated_exception
1365 else:
1366 raise
1367
1368 def _TranslateBotoException(self, e, bucket_name=None, object_name=None,
1369 generation=None):
1370 """Translates boto exceptions into their gsutil Cloud API equivalents.
1371
1372 Args:
1373 e: Any exception in TRANSLATABLE_BOTO_EXCEPTIONS.
1374 bucket_name: Optional bucket name in request that caused the exception.
1375 object_name: Optional object name in request that caused the exception.
1376 generation: Optional generation in request that caused the exception.
1377
1378 Returns:
1379 CloudStorageApiServiceException for translatable exceptions, None
1380 otherwise.
1381
1382 Because we're using isinstance, check for subtypes first.
1383 """
1384 if isinstance(e, boto.exception.StorageResponseError):
1385 if e.status == 400:
1386 return BadRequestException(e.code, status=e.status, body=e.body)
1387 elif e.status == 401 or e.status == 403:
1388 return AccessDeniedException(e.code, status=e.status, body=e.body)
1389 elif e.status == 404:
1390 if bucket_name:
1391 if object_name:
1392 return CreateObjectNotFoundException(e.status, self.provider,
1393 bucket_name, object_name,
1394 generation=generation)
1395 return CreateBucketNotFoundException(e.status, self.provider,
1396 bucket_name)
1397 return NotFoundException(e.code, status=e.status, body=e.body)
1398 elif e.status == 409 and e.code and 'BucketNotEmpty' in e.code:
1399 return NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
1400 status=e.status, body=e.body)
1401 elif e.status == 412:
1402 return PreconditionException(e.code, status=e.status, body=e.body)
1403 if isinstance(e, boto.exception.StorageCreateError):
1404 return ServiceException('Bucket already exists.', status=e.status,
1405 body=e.body)
1406
1407 if isinstance(e, boto.exception.BotoServerError):
1408 return ServiceException(e.message, status=e.status, body=e.body)
1409
1410 if isinstance(e, boto.exception.InvalidUriError):
1411 # Work around textwrap when searching for this string.
1412 if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)):
1413 return NotFoundException(e.message, status=404)
1414 return InvalidUrlError(e.message)
1415
1416 if isinstance(e, boto.exception.ResumableUploadException):
1417 if (e.disposition == boto.exception.ResumableTransferDisposition.ABORT or
1418 (e.disposition ==
1419 boto.exception.ResumableTransferDisposition.START_OVER)):
1420 return ResumableUploadAbortException(e.message)
1421 else:
1422 return ResumableUploadException(e.message)
1423
1424 if isinstance(e, boto.exception.ResumableDownloadException):
1425 return ResumableDownloadException(e.message)
1426
1427 return None
1428
1429 # For function docstrings, see CloudApiDelegator class.
1430 def XmlPassThroughGetAcl(self, storage_url, def_obj_acl=False):
1431 """See CloudApiDelegator class for function doc strings."""
1432 try:
1433 uri = boto.storage_uri(
1434 storage_url.url_string, suppress_consec_slashes=False,
1435 bucket_storage_uri_class=self.bucket_storage_uri_class,
1436 debug=self.debug)
1437 if def_obj_acl:
1438 return uri.get_def_acl()
1439 else:
1440 return uri.get_acl()
1441 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1442 self._TranslateExceptionAndRaise(e)
1443
1444 def XmlPassThroughSetAcl(self, acl_text, storage_url, canned=True,
1445 def_obj_acl=False):
1446 """See CloudApiDelegator class for function doc strings."""
1447 try:
1448 uri = boto.storage_uri(
1449 storage_url.url_string, suppress_consec_slashes=False,
1450 bucket_storage_uri_class=self.bucket_storage_uri_class,
1451 debug=self.debug)
1452 if canned:
1453 if def_obj_acl:
1454 canned_acls = uri.canned_acls()
1455 if acl_text not in canned_acls:
1456 raise CommandException('Invalid canned ACL "%s".' % acl_text)
1457 uri.set_def_acl(acl_text, uri.object_name)
1458 else:
1459 canned_acls = uri.canned_acls()
1460 if acl_text not in canned_acls:
1461 raise CommandException('Invalid canned ACL "%s".' % acl_text)
1462 uri.set_acl(acl_text, uri.object_name)
1463 else:
1464 if def_obj_acl:
1465 uri.set_def_xml_acl(acl_text, uri.object_name)
1466 else:
1467 uri.set_xml_acl(acl_text, uri.object_name)
1468 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1469 self._TranslateExceptionAndRaise(e)
1470
1471 def XmlPassThroughSetCors(self, cors_text, storage_url):
1472 """See CloudApiDelegator class for function doc strings."""
1473 # Parse XML document and convert into Cors object.
1474 cors_obj = Cors()
1475 h = handler.XmlHandler(cors_obj, None)
1476 try:
1477 xml.sax.parseString(cors_text, h)
1478 except SaxExceptions.SAXParseException, e:
1479 raise CommandException('Requested CORS is invalid: %s at line %s, '
1480 'column %s' % (e.getMessage(), e.getLineNumber(),
1481 e.getColumnNumber()))
1482
1483 try:
1484 uri = boto.storage_uri(
1485 storage_url.url_string, suppress_consec_slashes=False,
1486 bucket_storage_uri_class=self.bucket_storage_uri_class,
1487 debug=self.debug)
1488 uri.set_cors(cors_obj, False)
1489 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1490 self._TranslateExceptionAndRaise(e)
1491
1492 def XmlPassThroughGetCors(self, storage_url):
1493 """See CloudApiDelegator class for function doc strings."""
1494 uri = boto.storage_uri(
1495 storage_url.url_string, suppress_consec_slashes=False,
1496 bucket_storage_uri_class=self.bucket_storage_uri_class,
1497 debug=self.debug)
1498 try:
1499 cors = uri.get_cors(False)
1500 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1501 self._TranslateExceptionAndRaise(e)
1502
1503 parsed_xml = xml.dom.minidom.parseString(cors.to_xml().encode(UTF8))
1504 # Pretty-print the XML to make it more easily human editable.
1505 return parsed_xml.toprettyxml(indent=' ')
1506
1507 def XmlPassThroughGetLifecycle(self, storage_url):
1508 """See CloudApiDelegator class for function doc strings."""
1509 try:
1510 uri = boto.storage_uri(
1511 storage_url.url_string, suppress_consec_slashes=False,
1512 bucket_storage_uri_class=self.bucket_storage_uri_class,
1513 debug=self.debug)
1514 lifecycle = uri.get_lifecycle_config(False)
1515 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1516 self._TranslateExceptionAndRaise(e)
1517
1518 parsed_xml = xml.dom.minidom.parseString(lifecycle.to_xml().encode(UTF8))
1519 # Pretty-print the XML to make it more easily human editable.
1520 return parsed_xml.toprettyxml(indent=' ')
1521
1522 def XmlPassThroughSetLifecycle(self, lifecycle_text, storage_url):
1523 """See CloudApiDelegator class for function doc strings."""
1524 # Parse XML document and convert into lifecycle object.
1525 lifecycle_obj = LifecycleConfig()
1526 h = handler.XmlHandler(lifecycle_obj, None)
1527 try:
1528 xml.sax.parseString(lifecycle_text, h)
1529 except SaxExceptions.SAXParseException, e:
1530 raise CommandException(
1531 'Requested lifecycle config is invalid: %s at line %s, column %s' %
1532 (e.getMessage(), e.getLineNumber(), e.getColumnNumber()))
1533
1534 try:
1535 uri = boto.storage_uri(
1536 storage_url.url_string, suppress_consec_slashes=False,
1537 bucket_storage_uri_class=self.bucket_storage_uri_class,
1538 debug=self.debug)
1539 uri.configure_lifecycle(lifecycle_obj, False)
1540 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1541 self._TranslateExceptionAndRaise(e)
1542
1543 def XmlPassThroughGetLogging(self, storage_url):
1544 """See CloudApiDelegator class for function doc strings."""
1545 try:
1546 uri = boto.storage_uri(
1547 storage_url.url_string, suppress_consec_slashes=False,
1548 bucket_storage_uri_class=self.bucket_storage_uri_class,
1549 debug=self.debug)
1550 logging_config_xml = UnaryDictToXml(uri.get_logging_config())
1551 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1552 self._TranslateExceptionAndRaise(e)
1553
1554 return XmlParseString(logging_config_xml).toprettyxml()
1555
1556 def XmlPassThroughGetWebsite(self, storage_url):
1557 """See CloudApiDelegator class for function doc strings."""
1558 try:
1559 uri = boto.storage_uri(
1560 storage_url.url_string, suppress_consec_slashes=False,
1561 bucket_storage_uri_class=self.bucket_storage_uri_class,
1562 debug=self.debug)
1563 web_config_xml = UnaryDictToXml(uri.get_website_config())
1564 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1565 self._TranslateExceptionAndRaise(e)
1566
1567 return XmlParseString(web_config_xml).toprettyxml()
OLDNEW
« no previous file with comments | « gslib/boto_resumable_upload.py ('k') | gslib/bucket_listing_ref.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698