OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 # Copyright 2013 Google Inc. All Rights Reserved. | 2 # Copyright 2013 Google Inc. All Rights Reserved. |
3 # | 3 # |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
5 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
6 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
7 # | 7 # |
8 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
9 # | 9 # |
10 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
(...skipping 12 matching lines...) Expand all Loading... |
23 import httplib | 23 import httplib |
24 import json | 24 import json |
25 import multiprocessing | 25 import multiprocessing |
26 import os | 26 import os |
27 import pickle | 27 import pickle |
28 import random | 28 import random |
29 import re | 29 import re |
30 import socket | 30 import socket |
31 import tempfile | 31 import tempfile |
32 import textwrap | 32 import textwrap |
| 33 import threading |
33 import time | 34 import time |
34 import xml | 35 import xml |
35 from xml.dom.minidom import parseString as XmlParseString | 36 from xml.dom.minidom import parseString as XmlParseString |
36 from xml.sax import _exceptions as SaxExceptions | 37 from xml.sax import _exceptions as SaxExceptions |
37 | 38 |
38 import boto | 39 import boto |
39 from boto import handler | 40 from boto import handler |
40 from boto.exception import ResumableDownloadException as BotoResumableDownloadEx
ception | 41 from boto.exception import ResumableDownloadException as BotoResumableDownloadEx
ception |
41 from boto.exception import ResumableTransferDisposition | 42 from boto.exception import ResumableTransferDisposition |
42 from boto.gs.cors import Cors | 43 from boto.gs.cors import Cors |
(...skipping 24 matching lines...) Expand all Loading... |
67 from gslib.hashing_helper import Base64EncodeHash | 68 from gslib.hashing_helper import Base64EncodeHash |
68 from gslib.hashing_helper import Base64ToHexHash | 69 from gslib.hashing_helper import Base64ToHexHash |
69 from gslib.project_id import GOOG_PROJ_ID_HDR | 70 from gslib.project_id import GOOG_PROJ_ID_HDR |
70 from gslib.project_id import PopulateProjectId | 71 from gslib.project_id import PopulateProjectId |
71 from gslib.storage_url import StorageUrlFromString | 72 from gslib.storage_url import StorageUrlFromString |
72 from gslib.third_party.storage_apitools import storage_v1_messages as apitools_m
essages | 73 from gslib.third_party.storage_apitools import storage_v1_messages as apitools_m
essages |
73 from gslib.translation_helper import AclTranslation | 74 from gslib.translation_helper import AclTranslation |
74 from gslib.translation_helper import AddS3MarkerAclToObjectMetadata | 75 from gslib.translation_helper import AddS3MarkerAclToObjectMetadata |
75 from gslib.translation_helper import CorsTranslation | 76 from gslib.translation_helper import CorsTranslation |
76 from gslib.translation_helper import CreateBucketNotFoundException | 77 from gslib.translation_helper import CreateBucketNotFoundException |
| 78 from gslib.translation_helper import CreateNotFoundExceptionForObjectWrite |
77 from gslib.translation_helper import CreateObjectNotFoundException | 79 from gslib.translation_helper import CreateObjectNotFoundException |
78 from gslib.translation_helper import DEFAULT_CONTENT_TYPE | 80 from gslib.translation_helper import DEFAULT_CONTENT_TYPE |
79 from gslib.translation_helper import EncodeStringAsLong | 81 from gslib.translation_helper import EncodeStringAsLong |
80 from gslib.translation_helper import GenerationFromUrlAndString | 82 from gslib.translation_helper import GenerationFromUrlAndString |
81 from gslib.translation_helper import HeadersFromObjectMetadata | 83 from gslib.translation_helper import HeadersFromObjectMetadata |
82 from gslib.translation_helper import LifecycleTranslation | 84 from gslib.translation_helper import LifecycleTranslation |
83 from gslib.translation_helper import REMOVE_CORS_CONFIG | 85 from gslib.translation_helper import REMOVE_CORS_CONFIG |
84 from gslib.translation_helper import S3MarkerAclFromObjectMetadata | 86 from gslib.translation_helper import S3MarkerAclFromObjectMetadata |
85 from gslib.util import ConfigureNoOpAuthIfNeeded | 87 from gslib.util import ConfigureNoOpAuthIfNeeded |
86 from gslib.util import DEFAULT_FILE_BUFFER_SIZE | 88 from gslib.util import DEFAULT_FILE_BUFFER_SIZE |
87 from gslib.util import GetFileSize | |
88 from gslib.util import GetMaxRetryDelay | 89 from gslib.util import GetMaxRetryDelay |
89 from gslib.util import GetNumRetries | 90 from gslib.util import GetNumRetries |
90 from gslib.util import MultiprocessingIsAvailable | |
91 from gslib.util import S3_DELETE_MARKER_GUID | 91 from gslib.util import S3_DELETE_MARKER_GUID |
92 from gslib.util import TWO_MIB | 92 from gslib.util import TWO_MIB |
93 from gslib.util import UnaryDictToXml | 93 from gslib.util import UnaryDictToXml |
94 from gslib.util import UTF8 | 94 from gslib.util import UTF8 |
95 from gslib.util import XML_PROGRESS_CALLBACKS | 95 from gslib.util import XML_PROGRESS_CALLBACKS |
96 | 96 |
97 TRANSLATABLE_BOTO_EXCEPTIONS = (boto.exception.BotoServerError, | 97 TRANSLATABLE_BOTO_EXCEPTIONS = (boto.exception.BotoServerError, |
98 boto.exception.InvalidUriError, | 98 boto.exception.InvalidUriError, |
99 boto.exception.ResumableDownloadException, | 99 boto.exception.ResumableDownloadException, |
100 boto.exception.ResumableUploadException, | 100 boto.exception.ResumableUploadException, |
101 boto.exception.StorageCreateError, | 101 boto.exception.StorageCreateError, |
102 boto.exception.StorageResponseError) | 102 boto.exception.StorageResponseError) |
103 | 103 |
104 # If multiprocessing is available, this will be overridden to a (thread-safe) | 104 # pylint: disable=global-at-module-level |
105 # multiprocessing.Value in a call to InitializeMultiprocessingVariables. | 105 global boto_auth_initialized, boto_auth_initialized_lock |
| 106 # If multiprocessing is available, these will be overridden to process-safe |
| 107 # variables in InitializeMultiprocessingVariables. |
| 108 boto_auth_initialized_lock = threading.Lock() |
106 boto_auth_initialized = False | 109 boto_auth_initialized = False |
107 | 110 |
108 NON_EXISTENT_OBJECT_REGEX = re.compile(r'.*non-\s*existent\s*object', | 111 NON_EXISTENT_OBJECT_REGEX = re.compile(r'.*non-\s*existent\s*object', |
109 flags=re.DOTALL) | 112 flags=re.DOTALL) |
110 # Determines whether an etag is a valid MD5. | 113 # Determines whether an etag is a valid MD5. |
111 MD5_REGEX = re.compile(r'^"*[a-fA-F0-9]{32}"*$') | 114 MD5_REGEX = re.compile(r'^"*[a-fA-F0-9]{32}"*$') |
112 | 115 |
113 | 116 |
114 def InitializeMultiprocessingVariables(): | 117 def InitializeMultiprocessingVariables(): # pylint: disable=invalid-name |
115 """Perform necessary initialization for multiprocessing. | 118 """Perform necessary initialization for multiprocessing. |
116 | 119 |
117 See gslib.command.InitializeMultiprocessingVariables for an explanation | 120 See gslib.command.InitializeMultiprocessingVariables for an explanation |
118 of why this is necessary. | 121 of why this is necessary. |
119 """ | 122 """ |
120 global boto_auth_initialized # pylint: disable=global-variable-undefined | 123 # pylint: disable=global-variable-undefined |
| 124 global boto_auth_initialized, boto_auth_initialized_lock |
| 125 boto_auth_initialized_lock = gslib.util.CreateLock() |
121 boto_auth_initialized = multiprocessing.Value('i', 0) | 126 boto_auth_initialized = multiprocessing.Value('i', 0) |
122 | 127 |
123 | 128 |
| 129 class DownloadProxyCallbackHandler(object): |
| 130 """Intermediary callback to keep track of the number of bytes downloaded.""" |
| 131 |
| 132 def __init__(self, start_byte, callback): |
| 133 self._start_byte = start_byte |
| 134 self._callback = callback |
| 135 |
| 136 def call(self, bytes_downloaded, total_size): |
| 137 """Saves necessary data and then calls the given Cloud API callback. |
| 138 |
| 139 Args: |
| 140 bytes_downloaded: Number of bytes processed so far. |
| 141 total_size: Total size of the ongoing operation. |
| 142 """ |
| 143 if self._callback: |
| 144 self._callback(self._start_byte + bytes_downloaded, total_size) |
| 145 |
| 146 |
124 class BotoTranslation(CloudApi): | 147 class BotoTranslation(CloudApi): |
125 """Boto-based XML translation implementation of gsutil Cloud API. | 148 """Boto-based XML translation implementation of gsutil Cloud API. |
126 | 149 |
127 This class takes gsutil Cloud API objects, translates them to XML service | 150 This class takes gsutil Cloud API objects, translates them to XML service |
128 calls, and translates the results back into gsutil Cloud API objects for | 151 calls, and translates the results back into gsutil Cloud API objects for |
129 use by the caller. | 152 use by the caller. |
130 """ | 153 """ |
131 | 154 |
132 def __init__(self, bucket_storage_uri_class, logger, provider=None, | 155 def __init__(self, bucket_storage_uri_class, logger, provider=None, |
133 credentials=None, debug=0): | 156 credentials=None, debug=0, trace_token=None): |
134 """Performs necessary setup for interacting with the cloud storage provider. | 157 """Performs necessary setup for interacting with the cloud storage provider. |
135 | 158 |
136 Args: | 159 Args: |
137 bucket_storage_uri_class: boto storage_uri class, used by APIs that | 160 bucket_storage_uri_class: boto storage_uri class, used by APIs that |
138 provide boto translation or mocking. | 161 provide boto translation or mocking. |
139 logger: logging.logger for outputting log messages. | 162 logger: logging.logger for outputting log messages. |
140 provider: Provider prefix describing cloud storage provider to connect to. | 163 provider: Provider prefix describing cloud storage provider to connect to. |
141 'gs' and 's3' are supported. Function implementations ignore | 164 'gs' and 's3' are supported. Function implementations ignore |
142 the provider argument and use this one instead. | 165 the provider argument and use this one instead. |
143 credentials: Unused. | 166 credentials: Unused. |
144 debug: Debug level for the API implementation (0..3). | 167 debug: Debug level for the API implementation (0..3). |
| 168 trace_token: Unused in this subclass. |
145 """ | 169 """ |
146 super(BotoTranslation, self).__init__(bucket_storage_uri_class, logger, | 170 super(BotoTranslation, self).__init__(bucket_storage_uri_class, logger, |
147 provider=provider, debug=debug) | 171 provider=provider, debug=debug) |
148 _ = credentials | 172 _ = credentials |
149 global boto_auth_initialized # pylint: disable=global-variable-undefined | 173 # pylint: disable=global-variable-undefined, global-variable-not-assigned |
150 if MultiprocessingIsAvailable()[0] and not boto_auth_initialized.value: | 174 global boto_auth_initialized, boto_auth_initialized_lock |
| 175 with boto_auth_initialized_lock: |
151 ConfigureNoOpAuthIfNeeded() | 176 ConfigureNoOpAuthIfNeeded() |
152 boto_auth_initialized.value = 1 | 177 if isinstance(boto_auth_initialized, bool): |
153 elif not boto_auth_initialized: | 178 boto_auth_initialized = True |
154 ConfigureNoOpAuthIfNeeded() | 179 else: |
155 boto_auth_initialized = True | 180 boto_auth_initialized.value = 1 |
156 self.api_version = boto.config.get_value( | 181 self.api_version = boto.config.get_value( |
157 'GSUtil', 'default_api_version', '1') | 182 'GSUtil', 'default_api_version', '1') |
158 | 183 |
159 def GetBucket(self, bucket_name, provider=None, fields=None): | 184 def GetBucket(self, bucket_name, provider=None, fields=None): |
160 """See CloudApi class for function doc strings.""" | 185 """See CloudApi class for function doc strings.""" |
161 _ = provider | 186 _ = provider |
162 bucket_uri = self._StorageUriForBucket(bucket_name) | 187 bucket_uri = self._StorageUriForBucket(bucket_name) |
163 headers = {} | 188 headers = {} |
164 self._AddApiVersionToHeaders(headers) | 189 self._AddApiVersionToHeaders(headers) |
165 try: | 190 try: |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
311 raise NotFoundException('Bucket %s does not exist.' % bucket_name) | 336 raise NotFoundException('Bucket %s does not exist.' % bucket_name) |
312 else: | 337 else: |
313 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) | 338 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) |
314 | 339 |
315 def ListObjects(self, bucket_name, prefix=None, delimiter=None, | 340 def ListObjects(self, bucket_name, prefix=None, delimiter=None, |
316 all_versions=None, provider=None, fields=None): | 341 all_versions=None, provider=None, fields=None): |
317 """See CloudApi class for function doc strings.""" | 342 """See CloudApi class for function doc strings.""" |
318 _ = provider | 343 _ = provider |
319 get_fields = self._ListToGetFields(list_fields=fields) | 344 get_fields = self._ListToGetFields(list_fields=fields) |
320 bucket_uri = self._StorageUriForBucket(bucket_name) | 345 bucket_uri = self._StorageUriForBucket(bucket_name) |
321 prefix_list = [] | |
322 headers = {} | 346 headers = {} |
| 347 yield_prefixes = fields is None or 'prefixes' in fields |
| 348 yield_objects = fields is None or any( |
| 349 field.startswith('items/') for field in fields) |
323 self._AddApiVersionToHeaders(headers) | 350 self._AddApiVersionToHeaders(headers) |
324 try: | 351 try: |
325 objects_iter = bucket_uri.list_bucket(prefix=prefix or '', | 352 objects_iter = bucket_uri.list_bucket(prefix=prefix or '', |
326 delimiter=delimiter or '', | 353 delimiter=delimiter or '', |
327 all_versions=all_versions, | 354 all_versions=all_versions, |
328 headers=headers) | 355 headers=headers) |
329 except TRANSLATABLE_BOTO_EXCEPTIONS, e: | 356 except TRANSLATABLE_BOTO_EXCEPTIONS, e: |
330 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) | 357 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) |
331 | 358 |
332 try: | 359 try: |
333 for key in objects_iter: | 360 for key in objects_iter: |
334 if isinstance(key, Prefix): | 361 if yield_prefixes and isinstance(key, Prefix): |
335 prefix_list.append(key.name) | |
336 yield CloudApi.CsObjectOrPrefix(key.name, | 362 yield CloudApi.CsObjectOrPrefix(key.name, |
337 CloudApi.CsObjectOrPrefixType.PREFIX) | 363 CloudApi.CsObjectOrPrefixType.PREFIX) |
338 else: | 364 elif yield_objects: |
339 key_to_convert = key | 365 key_to_convert = key |
340 | 366 |
341 # Listed keys are populated with these fields during bucket listing. | 367 # Listed keys are populated with these fields during bucket listing. |
342 key_http_fields = set(['bucket', 'etag', 'name', 'updated', | 368 key_http_fields = set(['bucket', 'etag', 'name', 'updated', |
343 'generation', 'metageneration', 'size']) | 369 'generation', 'metageneration', 'size']) |
344 | 370 |
345 # When fields == None, the caller is requesting all possible fields. | 371 # When fields == None, the caller is requesting all possible fields. |
346 # If the caller requested any fields that are not populated by bucket | 372 # If the caller requested any fields that are not populated by bucket |
347 # listing, we'll need to make a separate HTTP call for each object to | 373 # listing, we'll need to make a separate HTTP call for each object to |
348 # get its metadata and populate the remaining fields with the result. | 374 # get its metadata and populate the remaining fields with the result. |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
400 download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, | 426 download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, |
401 start_byte=0, end_byte=None, progress_callback=None, | 427 start_byte=0, end_byte=None, progress_callback=None, |
402 serialization_data=None, digesters=None): | 428 serialization_data=None, digesters=None): |
403 """See CloudApi class for function doc strings.""" | 429 """See CloudApi class for function doc strings.""" |
404 # This implementation will get the object metadata first if we don't pass it | 430 # This implementation will get the object metadata first if we don't pass it |
405 # in via serialization_data. | 431 # in via serialization_data. |
406 headers = {} | 432 headers = {} |
407 self._AddApiVersionToHeaders(headers) | 433 self._AddApiVersionToHeaders(headers) |
408 if 'accept-encoding' not in headers: | 434 if 'accept-encoding' not in headers: |
409 headers['accept-encoding'] = 'gzip' | 435 headers['accept-encoding'] = 'gzip' |
410 if end_byte: | 436 if end_byte is not None: |
411 headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte) | 437 headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte) |
412 elif start_byte > 0: | 438 elif start_byte > 0: |
413 headers['range'] = 'bytes=%s-' % (start_byte) | 439 headers['range'] = 'bytes=%s-' % (start_byte) |
414 else: | 440 elif start_byte < 0: |
415 headers['range'] = 'bytes=%s' % (start_byte) | 441 headers['range'] = 'bytes=%s' % (start_byte) |
416 | 442 |
417 # Since in most cases we already made a call to get the object metadata, | 443 # Since in most cases we already made a call to get the object metadata, |
418 # here we avoid an extra HTTP call by unpickling the key. This is coupled | 444 # here we avoid an extra HTTP call by unpickling the key. This is coupled |
419 # with the implementation in _BotoKeyToObject. | 445 # with the implementation in _BotoKeyToObject. |
420 if serialization_data: | 446 if serialization_data: |
421 serialization_dict = json.loads(serialization_data) | 447 serialization_dict = json.loads(serialization_data) |
422 key = pickle.loads(binascii.a2b_base64(serialization_dict['url'])) | 448 key = pickle.loads(binascii.a2b_base64(serialization_dict['url'])) |
423 else: | 449 else: |
424 key = self._GetBotoKey(bucket_name, object_name, generation=generation) | 450 key = self._GetBotoKey(bucket_name, object_name, generation=generation) |
(...skipping 11 matching lines...) Expand all Loading... |
436 | 462 |
437 if total_size: | 463 if total_size: |
438 num_progress_callbacks = max(int(total_size) / TWO_MIB, | 464 num_progress_callbacks = max(int(total_size) / TWO_MIB, |
439 XML_PROGRESS_CALLBACKS) | 465 XML_PROGRESS_CALLBACKS) |
440 else: | 466 else: |
441 num_progress_callbacks = XML_PROGRESS_CALLBACKS | 467 num_progress_callbacks = XML_PROGRESS_CALLBACKS |
442 | 468 |
443 try: | 469 try: |
444 if download_strategy is CloudApi.DownloadStrategy.RESUMABLE: | 470 if download_strategy is CloudApi.DownloadStrategy.RESUMABLE: |
445 self._PerformResumableDownload( | 471 self._PerformResumableDownload( |
446 download_stream, key, headers=headers, callback=progress_callback, | 472 download_stream, start_byte, end_byte, key, |
| 473 headers=headers, callback=progress_callback, |
447 num_callbacks=num_progress_callbacks, hash_algs=hash_algs) | 474 num_callbacks=num_progress_callbacks, hash_algs=hash_algs) |
448 elif download_strategy is CloudApi.DownloadStrategy.ONE_SHOT: | 475 elif download_strategy is CloudApi.DownloadStrategy.ONE_SHOT: |
449 self._PerformSimpleDownload( | 476 self._PerformSimpleDownload( |
450 download_stream, key, progress_callback=progress_callback, | 477 download_stream, key, progress_callback=progress_callback, |
451 num_progress_callbacks=num_progress_callbacks, headers=headers, | 478 num_progress_callbacks=num_progress_callbacks, headers=headers, |
452 hash_algs=hash_algs) | 479 hash_algs=hash_algs) |
453 else: | 480 else: |
454 raise ArgumentException('Unsupported DownloadStrategy: %s' % | 481 raise ArgumentException('Unsupported DownloadStrategy: %s' % |
455 download_strategy) | 482 download_strategy) |
456 except TRANSLATABLE_BOTO_EXCEPTIONS, e: | 483 except TRANSLATABLE_BOTO_EXCEPTIONS, e: |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
498 headers = {} | 525 headers = {} |
499 self._AddApiVersionToHeaders(headers) | 526 self._AddApiVersionToHeaders(headers) |
500 try: | 527 try: |
501 key.get_contents_to_file(download_stream, cb=progress_callback, | 528 key.get_contents_to_file(download_stream, cb=progress_callback, |
502 num_cb=num_progress_callbacks, headers=headers, | 529 num_cb=num_progress_callbacks, headers=headers, |
503 hash_algs=hash_algs) | 530 hash_algs=hash_algs) |
504 except TypeError: # s3 and mocks do not support hash_algs | 531 except TypeError: # s3 and mocks do not support hash_algs |
505 key.get_contents_to_file(download_stream, cb=progress_callback, | 532 key.get_contents_to_file(download_stream, cb=progress_callback, |
506 num_cb=num_progress_callbacks, headers=headers) | 533 num_cb=num_progress_callbacks, headers=headers) |
507 | 534 |
508 def _PerformResumableDownload(self, fp, key, headers=None, callback=None, | 535 def _PerformResumableDownload(self, fp, start_byte, end_byte, key, |
| 536 headers=None, callback=None, |
509 num_callbacks=XML_PROGRESS_CALLBACKS, | 537 num_callbacks=XML_PROGRESS_CALLBACKS, |
510 hash_algs=None): | 538 hash_algs=None): |
511 """Downloads bytes from key to fp, resuming as needed. | 539 """Downloads bytes from key to fp, resuming as needed. |
512 | 540 |
513 Args: | 541 Args: |
514 fp: File pointer into which data should be downloaded | 542 fp: File pointer into which data should be downloaded. |
| 543 start_byte: Start byte of the download. |
| 544 end_byte: End byte of the download. |
515 key: Key object from which data is to be downloaded | 545 key: Key object from which data is to be downloaded |
516 headers: Headers to send when retrieving the file | 546 headers: Headers to send when retrieving the file |
517 callback: (optional) a callback function that will be called to report | 547 callback: (optional) a callback function that will be called to report |
518 progress on the download. The callback should accept two integer | 548 progress on the download. The callback should accept two integer |
519 parameters. The first integer represents the number of | 549 parameters. The first integer represents the number of |
520 bytes that have been successfully transmitted from the service. The | 550 bytes that have been successfully transmitted from the service. The |
521 second represents the total number of bytes that need to be | 551 second represents the total number of bytes that need to be |
522 transmitted. | 552 transmitted. |
523 num_callbacks: (optional) If a callback is specified with the callback | 553 num_callbacks: (optional) If a callback is specified with the callback |
524 parameter, this determines the granularity of the callback | 554 parameter, this determines the granularity of the callback |
525 by defining the maximum number of times the callback will be | 555 by defining the maximum number of times the callback will be |
526 called during the file transfer. | 556 called during the file transfer. |
527 hash_algs: Dict of hash algorithms to apply to downloaded bytes. | 557 hash_algs: Dict of hash algorithms to apply to downloaded bytes. |
528 | 558 |
529 Raises: | 559 Raises: |
530 ResumableDownloadException on error. | 560 ResumableDownloadException on error. |
531 """ | 561 """ |
532 if not headers: | 562 if not headers: |
533 headers = {} | 563 headers = {} |
534 self._AddApiVersionToHeaders(headers) | 564 self._AddApiVersionToHeaders(headers) |
535 | 565 |
536 retryable_exceptions = (httplib.HTTPException, IOError, socket.error, | 566 retryable_exceptions = (httplib.HTTPException, IOError, socket.error, |
537 socket.gaierror) | 567 socket.gaierror) |
538 | 568 |
539 debug = key.bucket.connection.debug | 569 debug = key.bucket.connection.debug |
540 | 570 |
541 num_retries = GetNumRetries() | 571 num_retries = GetNumRetries() |
542 progress_less_iterations = 0 | 572 progress_less_iterations = 0 |
| 573 last_progress_byte = start_byte |
543 | 574 |
544 while True: # Retry as long as we're making progress. | 575 while True: # Retry as long as we're making progress. |
545 had_file_bytes_before_attempt = GetFileSize(fp) | |
546 try: | 576 try: |
547 cur_file_size = GetFileSize(fp, position_to_eof=True) | 577 cb_handler = DownloadProxyCallbackHandler(start_byte, callback) |
548 | |
549 def DownloadProxyCallback(total_bytes_downloaded, total_size): | |
550 """Translates a boto callback into a gsutil Cloud API callback. | |
551 | |
552 Callbacks are originally made by boto.s3.Key.get_file(); here we take | |
553 into account that we're resuming a download. | |
554 | |
555 Args: | |
556 total_bytes_downloaded: Actual bytes downloaded so far, not | |
557 including the point we resumed from. | |
558 total_size: Total size of the download. | |
559 """ | |
560 if callback: | |
561 callback(cur_file_size + total_bytes_downloaded, total_size) | |
562 | |
563 headers = headers.copy() | 578 headers = headers.copy() |
564 headers['Range'] = 'bytes=%d-%d' % (cur_file_size, key.size - 1) | 579 headers['Range'] = 'bytes=%d-%d' % (start_byte, end_byte) |
565 cb = DownloadProxyCallback | |
566 | 580 |
567 # Disable AWSAuthConnection-level retry behavior, since that would | 581 # Disable AWSAuthConnection-level retry behavior, since that would |
568 # cause downloads to restart from scratch. | 582 # cause downloads to restart from scratch. |
569 try: | 583 try: |
570 key.get_file(fp, headers, cb, num_callbacks, override_num_retries=0, | 584 key.get_file(fp, headers, cb_handler.call, num_callbacks, |
571 hash_algs=hash_algs) | 585 override_num_retries=0, hash_algs=hash_algs) |
572 except TypeError: | 586 except TypeError: |
573 key.get_file(fp, headers, cb, num_callbacks, override_num_retries=0) | 587 key.get_file(fp, headers, cb_handler.call, num_callbacks, |
| 588 override_num_retries=0) |
574 fp.flush() | 589 fp.flush() |
575 # Download succeeded. | 590 # Download succeeded. |
576 return | 591 return |
577 except retryable_exceptions, e: | 592 except retryable_exceptions, e: |
578 if debug >= 1: | 593 if debug >= 1: |
579 self.logger.info('Caught exception (%s)', repr(e)) | 594 self.logger.info('Caught exception (%s)', repr(e)) |
580 if isinstance(e, IOError) and e.errno == errno.EPIPE: | 595 if isinstance(e, IOError) and e.errno == errno.EPIPE: |
581 # Broken pipe error causes httplib to immediately | 596 # Broken pipe error causes httplib to immediately |
582 # close the socket (http://bugs.python.org/issue5542), | 597 # close the socket (http://bugs.python.org/issue5542), |
583 # so we need to close and reopen the key before resuming | 598 # so we need to close and reopen the key before resuming |
584 # the download. | 599 # the download. |
585 if self.provider == 's3': | 600 if self.provider == 's3': |
586 key.get_file(fp, headers, cb, num_callbacks, override_num_retries=0) | 601 key.get_file(fp, headers, cb_handler.call, num_callbacks, |
| 602 override_num_retries=0) |
587 else: # self.provider == 'gs' | 603 else: # self.provider == 'gs' |
588 key.get_file(fp, headers, cb, num_callbacks, | 604 key.get_file(fp, headers, cb_handler.call, num_callbacks, |
589 override_num_retries=0, hash_algs=hash_algs) | 605 override_num_retries=0, hash_algs=hash_algs) |
590 except BotoResumableDownloadException, e: | 606 except BotoResumableDownloadException, e: |
591 if (e.disposition == | 607 if (e.disposition == |
592 ResumableTransferDisposition.ABORT_CUR_PROCESS): | 608 ResumableTransferDisposition.ABORT_CUR_PROCESS): |
593 raise ResumableDownloadException(e.message) | 609 raise ResumableDownloadException(e.message) |
594 else: | 610 else: |
595 if debug >= 1: | 611 if debug >= 1: |
596 self.logger.info('Caught ResumableDownloadException (%s) - will ' | 612 self.logger.info('Caught ResumableDownloadException (%s) - will ' |
597 'retry', e.message) | 613 'retry', e.message) |
598 | 614 |
599 # At this point we had a re-tryable failure; see if made progress. | 615 # At this point we had a re-tryable failure; see if made progress. |
600 if GetFileSize(fp) > had_file_bytes_before_attempt: | 616 start_byte = fp.tell() |
| 617 if start_byte > last_progress_byte: |
| 618 last_progress_byte = start_byte |
601 progress_less_iterations = 0 | 619 progress_less_iterations = 0 |
602 else: | 620 else: |
603 progress_less_iterations += 1 | 621 progress_less_iterations += 1 |
604 | 622 |
605 if progress_less_iterations > num_retries: | 623 if progress_less_iterations > num_retries: |
606 # Don't retry any longer in the current process. | 624 # Don't retry any longer in the current process. |
607 raise ResumableDownloadException( | 625 raise ResumableDownloadException( |
608 'Too many resumable download attempts failed without ' | 626 'Too many resumable download attempts failed without ' |
609 'progress. You might try this download again later') | 627 'progress. You might try this download again later') |
610 | 628 |
(...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
812 try: | 830 try: |
813 self._PerformResumableUpload(dst_uri.new_key(headers=headers), | 831 self._PerformResumableUpload(dst_uri.new_key(headers=headers), |
814 upload_stream, size, tracker_callback, | 832 upload_stream, size, tracker_callback, |
815 canned_acl=canned_acl, | 833 canned_acl=canned_acl, |
816 serialization_data=serialization_data, | 834 serialization_data=serialization_data, |
817 progress_callback=progress_callback, | 835 progress_callback=progress_callback, |
818 headers=headers) | 836 headers=headers) |
819 return self._HandleSuccessfulUpload(dst_uri, object_metadata, | 837 return self._HandleSuccessfulUpload(dst_uri, object_metadata, |
820 fields=fields) | 838 fields=fields) |
821 except TRANSLATABLE_BOTO_EXCEPTIONS, e: | 839 except TRANSLATABLE_BOTO_EXCEPTIONS, e: |
| 840 not_found_exception = CreateNotFoundExceptionForObjectWrite( |
| 841 self.provider, object_metadata.bucket) |
822 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket, | 842 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket, |
823 object_name=object_metadata.name) | 843 object_name=object_metadata.name, |
| 844 not_found_exception=not_found_exception) |
824 | 845 |
825 def UploadObjectStreaming(self, upload_stream, object_metadata, | 846 def UploadObjectStreaming(self, upload_stream, object_metadata, |
826 canned_acl=None, progress_callback=None, | 847 canned_acl=None, progress_callback=None, |
827 preconditions=None, provider=None, fields=None): | 848 preconditions=None, provider=None, fields=None): |
828 """See CloudApi class for function doc strings.""" | 849 """See CloudApi class for function doc strings.""" |
829 headers, dst_uri = self._UploadSetup(object_metadata, | 850 headers, dst_uri = self._UploadSetup(object_metadata, |
830 preconditions=preconditions) | 851 preconditions=preconditions) |
831 | 852 |
832 try: | 853 try: |
833 self._PerformStreamingUpload( | 854 self._PerformStreamingUpload( |
834 dst_uri, upload_stream, canned_acl=canned_acl, | 855 dst_uri, upload_stream, canned_acl=canned_acl, |
835 progress_callback=progress_callback, headers=headers) | 856 progress_callback=progress_callback, headers=headers) |
836 return self._HandleSuccessfulUpload(dst_uri, object_metadata, | 857 return self._HandleSuccessfulUpload(dst_uri, object_metadata, |
837 fields=fields) | 858 fields=fields) |
838 except TRANSLATABLE_BOTO_EXCEPTIONS, e: | 859 except TRANSLATABLE_BOTO_EXCEPTIONS, e: |
| 860 not_found_exception = CreateNotFoundExceptionForObjectWrite( |
| 861 self.provider, object_metadata.bucket) |
839 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket, | 862 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket, |
840 object_name=object_metadata.name) | 863 object_name=object_metadata.name, |
| 864 not_found_exception=not_found_exception) |
841 | 865 |
842 def UploadObject(self, upload_stream, object_metadata, canned_acl=None, | 866 def UploadObject(self, upload_stream, object_metadata, canned_acl=None, |
843 preconditions=None, size=None, progress_callback=None, | 867 preconditions=None, size=None, progress_callback=None, |
844 provider=None, fields=None): | 868 provider=None, fields=None): |
845 """See CloudApi class for function doc strings.""" | 869 """See CloudApi class for function doc strings.""" |
846 headers, dst_uri = self._UploadSetup(object_metadata, | 870 headers, dst_uri = self._UploadSetup(object_metadata, |
847 preconditions=preconditions) | 871 preconditions=preconditions) |
848 | 872 |
849 try: | 873 try: |
850 md5 = None | 874 md5 = None |
851 if object_metadata.md5Hash: | 875 if object_metadata.md5Hash: |
852 md5 = [] | 876 md5 = [] |
853 # boto expects hex at index 0, base64 at index 1 | 877 # boto expects hex at index 0, base64 at index 1 |
854 md5.append(Base64ToHexHash(object_metadata.md5Hash)) | 878 md5.append(Base64ToHexHash(object_metadata.md5Hash)) |
855 md5.append(object_metadata.md5Hash.strip('\n"\'')) | 879 md5.append(object_metadata.md5Hash.strip('\n"\'')) |
856 self._PerformSimpleUpload(dst_uri, upload_stream, md5=md5, | 880 self._PerformSimpleUpload(dst_uri, upload_stream, md5=md5, |
857 canned_acl=canned_acl, | 881 canned_acl=canned_acl, |
858 progress_callback=progress_callback, | 882 progress_callback=progress_callback, |
859 headers=headers) | 883 headers=headers) |
860 return self._HandleSuccessfulUpload(dst_uri, object_metadata, | 884 return self._HandleSuccessfulUpload(dst_uri, object_metadata, |
861 fields=fields) | 885 fields=fields) |
862 except TRANSLATABLE_BOTO_EXCEPTIONS, e: | 886 except TRANSLATABLE_BOTO_EXCEPTIONS, e: |
| 887 not_found_exception = CreateNotFoundExceptionForObjectWrite( |
| 888 self.provider, object_metadata.bucket) |
863 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket, | 889 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket, |
864 object_name=object_metadata.name) | 890 object_name=object_metadata.name, |
| 891 not_found_exception=not_found_exception) |
865 | 892 |
866 def DeleteObject(self, bucket_name, object_name, preconditions=None, | 893 def DeleteObject(self, bucket_name, object_name, preconditions=None, |
867 generation=None, provider=None): | 894 generation=None, provider=None): |
868 """See CloudApi class for function doc strings.""" | 895 """See CloudApi class for function doc strings.""" |
869 _ = provider | 896 _ = provider |
870 headers = {} | 897 headers = {} |
871 self._AddApiVersionToHeaders(headers) | 898 self._AddApiVersionToHeaders(headers) |
872 self._AddPreconditionsToHeaders(preconditions, headers) | 899 self._AddPreconditionsToHeaders(preconditions, headers) |
873 | 900 |
874 uri = self._StorageUriForObject(bucket_name, object_name, | 901 uri = self._StorageUriForObject(bucket_name, object_name, |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
913 preserve_acl = True | 940 preserve_acl = True |
914 | 941 |
915 try: | 942 try: |
916 new_key = dst_uri.copy_key( | 943 new_key = dst_uri.copy_key( |
917 src_obj_metadata.bucket, src_obj_metadata.name, | 944 src_obj_metadata.bucket, src_obj_metadata.name, |
918 preserve_acl=preserve_acl, headers=headers, | 945 preserve_acl=preserve_acl, headers=headers, |
919 src_version_id=src_version_id, src_generation=src_generation) | 946 src_version_id=src_version_id, src_generation=src_generation) |
920 | 947 |
921 return self._BotoKeyToObject(new_key, fields=fields) | 948 return self._BotoKeyToObject(new_key, fields=fields) |
922 except TRANSLATABLE_BOTO_EXCEPTIONS, e: | 949 except TRANSLATABLE_BOTO_EXCEPTIONS, e: |
923 self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket, | 950 not_found_exception = CreateNotFoundExceptionForObjectWrite( |
924 dst_obj_metadata.name) | 951 self.provider, dst_obj_metadata.bucket, src_provider=self.provider, |
| 952 src_bucket_name=src_obj_metadata.bucket, |
| 953 src_object_name=src_obj_metadata.name, src_generation=src_generation) |
| 954 self._TranslateExceptionAndRaise(e, bucket_name=dst_obj_metadata.bucket, |
| 955 object_name=dst_obj_metadata.name, |
| 956 not_found_exception=not_found_exception) |
925 | 957 |
926 def ComposeObject(self, src_objs_metadata, dst_obj_metadata, | 958 def ComposeObject(self, src_objs_metadata, dst_obj_metadata, |
927 preconditions=None, provider=None, fields=None): | 959 preconditions=None, provider=None, fields=None): |
928 """See CloudApi class for function doc strings.""" | 960 """See CloudApi class for function doc strings.""" |
929 _ = provider | 961 _ = provider |
930 ValidateDstObjectMetadata(dst_obj_metadata) | 962 ValidateDstObjectMetadata(dst_obj_metadata) |
931 | 963 |
932 dst_obj_name = dst_obj_metadata.name | 964 dst_obj_name = dst_obj_metadata.name |
933 dst_obj_metadata.name = None | 965 dst_obj_metadata.name = None |
934 dst_bucket_name = dst_obj_metadata.bucket | 966 dst_bucket_name = dst_obj_metadata.bucket |
(...skipping 440 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1375 except boto.exception.GSResponseError, e: | 1407 except boto.exception.GSResponseError, e: |
1376 if e.status == 403: | 1408 if e.status == 403: |
1377 # Consume access denied exceptions to mimic JSON behavior of simply | 1409 # Consume access denied exceptions to mimic JSON behavior of simply |
1378 # returning None if sufficient permission is not present. The caller | 1410 # returning None if sufficient permission is not present. The caller |
1379 # needs to handle the case where the ACL is not populated. | 1411 # needs to handle the case where the ACL is not populated. |
1380 pass | 1412 pass |
1381 else: | 1413 else: |
1382 raise | 1414 raise |
1383 | 1415 |
1384 def _TranslateExceptionAndRaise(self, e, bucket_name=None, object_name=None, | 1416 def _TranslateExceptionAndRaise(self, e, bucket_name=None, object_name=None, |
1385 generation=None): | 1417 generation=None, not_found_exception=None): |
1386 """Translates a Boto exception and raises the translated or original value. | 1418 """Translates a Boto exception and raises the translated or original value. |
1387 | 1419 |
1388 Args: | 1420 Args: |
1389 e: Any Exception. | 1421 e: Any Exception. |
1390 bucket_name: Optional bucket name in request that caused the exception. | 1422 bucket_name: Optional bucket name in request that caused the exception. |
1391 object_name: Optional object name in request that caused the exception. | 1423 object_name: Optional object name in request that caused the exception. |
1392 generation: Optional generation in request that caused the exception. | 1424 generation: Optional generation in request that caused the exception. |
| 1425 not_found_exception: Optional exception to raise in the not-found case. |
1393 | 1426 |
1394 Raises: | 1427 Raises: |
1395 Translated CloudApi exception, or the original exception if it was not | 1428 Translated CloudApi exception, or the original exception if it was not |
1396 translatable. | 1429 translatable. |
1397 """ | 1430 """ |
1398 translated_exception = self._TranslateBotoException( | 1431 translated_exception = self._TranslateBotoException( |
1399 e, bucket_name=bucket_name, object_name=object_name, | 1432 e, bucket_name=bucket_name, object_name=object_name, |
1400 generation=generation) | 1433 generation=generation, not_found_exception=not_found_exception) |
1401 if translated_exception: | 1434 if translated_exception: |
1402 raise translated_exception | 1435 raise translated_exception |
1403 else: | 1436 else: |
1404 raise | 1437 raise |
1405 | 1438 |
1406 def _TranslateBotoException(self, e, bucket_name=None, object_name=None, | 1439 def _TranslateBotoException(self, e, bucket_name=None, object_name=None, |
1407 generation=None): | 1440 generation=None, not_found_exception=None): |
1408 """Translates boto exceptions into their gsutil Cloud API equivalents. | 1441 """Translates boto exceptions into their gsutil Cloud API equivalents. |
1409 | 1442 |
1410 Args: | 1443 Args: |
1411 e: Any exception in TRANSLATABLE_BOTO_EXCEPTIONS. | 1444 e: Any exception in TRANSLATABLE_BOTO_EXCEPTIONS. |
1412 bucket_name: Optional bucket name in request that caused the exception. | 1445 bucket_name: Optional bucket name in request that caused the exception. |
1413 object_name: Optional object name in request that caused the exception. | 1446 object_name: Optional object name in request that caused the exception. |
1414 generation: Optional generation in request that caused the exception. | 1447 generation: Optional generation in request that caused the exception. |
| 1448 not_found_exception: Optional exception to raise in the not-found case. |
1415 | 1449 |
1416 Returns: | 1450 Returns: |
1417 CloudStorageApiServiceException for translatable exceptions, None | 1451 CloudStorageApiServiceException for translatable exceptions, None |
1418 otherwise. | 1452 otherwise. |
1419 | 1453 |
1420 Because we're using isinstance, check for subtypes first. | 1454 Because we're using isinstance, check for subtypes first. |
1421 """ | 1455 """ |
1422 if isinstance(e, boto.exception.StorageResponseError): | 1456 if isinstance(e, boto.exception.StorageResponseError): |
1423 if e.status == 400: | 1457 if e.status == 400: |
1424 return BadRequestException(e.code, status=e.status, body=e.body) | 1458 return BadRequestException(e.code, status=e.status, body=e.body) |
1425 elif e.status == 401 or e.status == 403: | 1459 elif e.status == 401 or e.status == 403: |
1426 return AccessDeniedException(e.code, status=e.status, body=e.body) | 1460 return AccessDeniedException(e.code, status=e.status, body=e.body) |
1427 elif e.status == 404: | 1461 elif e.status == 404: |
1428 if bucket_name: | 1462 if not_found_exception: |
| 1463 # The exception is pre-constructed prior to translation; the HTTP |
| 1464 # status code isn't available at that time. |
| 1465 setattr(not_found_exception, 'status', e.status) |
| 1466 return not_found_exception |
| 1467 elif bucket_name: |
1429 if object_name: | 1468 if object_name: |
1430 return CreateObjectNotFoundException(e.status, self.provider, | 1469 return CreateObjectNotFoundException(e.status, self.provider, |
1431 bucket_name, object_name, | 1470 bucket_name, object_name, |
1432 generation=generation) | 1471 generation=generation) |
1433 return CreateBucketNotFoundException(e.status, self.provider, | 1472 return CreateBucketNotFoundException(e.status, self.provider, |
1434 bucket_name) | 1473 bucket_name) |
1435 return NotFoundException(e.code, status=e.status, body=e.body) | 1474 return NotFoundException(e.message, status=e.status, body=e.body) |
| 1475 |
1436 elif e.status == 409 and e.code and 'BucketNotEmpty' in e.code: | 1476 elif e.status == 409 and e.code and 'BucketNotEmpty' in e.code: |
1437 return NotEmptyException('BucketNotEmpty (%s)' % bucket_name, | 1477 return NotEmptyException('BucketNotEmpty (%s)' % bucket_name, |
1438 status=e.status, body=e.body) | 1478 status=e.status, body=e.body) |
1439 elif e.status == 410: | 1479 elif e.status == 410: |
1440 # 410 errors should always cause us to start over - either the UploadID | 1480 # 410 errors should always cause us to start over - either the UploadID |
1441 # has expired or there was a server-side problem that requires starting | 1481 # has expired or there was a server-side problem that requires starting |
1442 # the upload over from scratch. | 1482 # the upload over from scratch. |
1443 return ResumableUploadStartOverException(e.message) | 1483 return ResumableUploadStartOverException(e.message) |
1444 elif e.status == 412: | 1484 elif e.status == 412: |
1445 return PreconditionException(e.code, status=e.status, body=e.body) | 1485 return PreconditionException(e.code, status=e.status, body=e.body) |
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1609 try: | 1649 try: |
1610 uri = boto.storage_uri( | 1650 uri = boto.storage_uri( |
1611 storage_url.url_string, suppress_consec_slashes=False, | 1651 storage_url.url_string, suppress_consec_slashes=False, |
1612 bucket_storage_uri_class=self.bucket_storage_uri_class, | 1652 bucket_storage_uri_class=self.bucket_storage_uri_class, |
1613 debug=self.debug) | 1653 debug=self.debug) |
1614 web_config_xml = UnaryDictToXml(uri.get_website_config()) | 1654 web_config_xml = UnaryDictToXml(uri.get_website_config()) |
1615 except TRANSLATABLE_BOTO_EXCEPTIONS, e: | 1655 except TRANSLATABLE_BOTO_EXCEPTIONS, e: |
1616 self._TranslateExceptionAndRaise(e) | 1656 self._TranslateExceptionAndRaise(e) |
1617 | 1657 |
1618 return XmlParseString(web_config_xml).toprettyxml() | 1658 return XmlParseString(web_config_xml).toprettyxml() |
OLD | NEW |