Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1530)

Unified Diff: tools/telemetry/third_party/gsutil/third_party/boto/boto/s3/key.py

Issue 1260493004: Revert "Add gsutil 4.13 to telemetry/third_party" (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/telemetry/third_party/gsutil/third_party/boto/boto/s3/key.py
diff --git a/tools/telemetry/third_party/gsutil/third_party/boto/boto/s3/key.py b/tools/telemetry/third_party/gsutil/third_party/boto/boto/s3/key.py
deleted file mode 100644
index 194c6b6e9334d0b25e16d15ba7e0699628615e0c..0000000000000000000000000000000000000000
--- a/tools/telemetry/third_party/gsutil/third_party/boto/boto/s3/key.py
+++ /dev/null
@@ -1,1921 +0,0 @@
-# Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/
-# Copyright (c) 2011, Nexenta Systems Inc.
-# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish, dis-
-# tribute, sublicense, and/or sell copies of the Software, and to permit
-# persons to whom the Software is furnished to do so, subject to the fol-
-# lowing conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
-# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
-# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-import email.utils
-import errno
-import hashlib
-import mimetypes
-import os
-import re
-import base64
-import binascii
-import math
-from hashlib import md5
-import boto.utils
-from boto.compat import BytesIO, six, urllib, encodebytes
-
-from boto.exception import BotoClientError
-from boto.exception import StorageDataError
-from boto.exception import PleaseRetryException
-from boto.provider import Provider
-from boto.s3.keyfile import KeyFile
-from boto.s3.user import User
-from boto import UserAgent
-from boto.utils import compute_md5, compute_hash
-from boto.utils import find_matching_headers
-from boto.utils import merge_headers_by_name
-
-
-class Key(object):
- """
- Represents a key (object) in an S3 bucket.
-
- :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`.
- :ivar name: The name of this Key object.
- :ivar metadata: A dictionary containing user metadata that you
- wish to store with the object or that has been retrieved from
- an existing object.
- :ivar cache_control: The value of the `Cache-Control` HTTP header.
- :ivar content_type: The value of the `Content-Type` HTTP header.
- :ivar content_encoding: The value of the `Content-Encoding` HTTP header.
- :ivar content_disposition: The value of the `Content-Disposition` HTTP
- header.
- :ivar content_language: The value of the `Content-Language` HTTP header.
- :ivar etag: The `etag` associated with this object.
- :ivar last_modified: The string timestamp representing the last
- time this object was modified in S3.
- :ivar owner: The ID of the owner of this object.
- :ivar storage_class: The storage class of the object. Currently, one of:
- STANDARD | REDUCED_REDUNDANCY | GLACIER
- :ivar md5: The MD5 hash of the contents of the object.
- :ivar size: The size, in bytes, of the object.
- :ivar version_id: The version ID of this object, if it is a versioned
- object.
- :ivar encrypted: Whether the object is encrypted while at rest on
- the server.
- """
-
- DefaultContentType = 'application/octet-stream'
-
- RestoreBody = """<?xml version="1.0" encoding="UTF-8"?>
- <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01">
- <Days>%s</Days>
- </RestoreRequest>"""
-
-
- BufferSize = boto.config.getint('Boto', 'key_buffer_size', 8192)
-
- # The object metadata fields a user can set, other than custom metadata
- # fields (i.e., those beginning with a provider-specific prefix like
- # x-amz-meta).
- base_user_settable_fields = set(["cache-control", "content-disposition",
- "content-encoding", "content-language",
- "content-md5", "content-type",
- "x-robots-tag", "expires"])
- _underscore_base_user_settable_fields = set()
- for f in base_user_settable_fields:
- _underscore_base_user_settable_fields.add(f.replace('-', '_'))
- # Metadata fields, whether user-settable or not, other than custom
- # metadata fields (i.e., those beginning with a provider specific prefix
- # like x-amz-meta).
- base_fields = (base_user_settable_fields |
- set(["last-modified", "content-length", "date", "etag"]))
-
-
-
- def __init__(self, bucket=None, name=None):
- self.bucket = bucket
- self.name = name
- self.metadata = {}
- self.cache_control = None
- self.content_type = self.DefaultContentType
- self.content_encoding = None
- self.content_disposition = None
- self.content_language = None
- self.filename = None
- self.etag = None
- self.is_latest = False
- self.last_modified = None
- self.owner = None
- self._storage_class = None
- self.path = None
- self.resp = None
- self.mode = None
- self.size = None
- self.version_id = None
- self.source_version_id = None
- self.delete_marker = False
- self.encrypted = None
- # If the object is being restored, this attribute will be set to True.
- # If the object is restored, it will be set to False. Otherwise this
- # value will be None. If the restore is completed (ongoing_restore =
- # False), the expiry_date will be populated with the expiry date of the
- # restored object.
- self.ongoing_restore = None
- self.expiry_date = None
- self.local_hashes = {}
-
- def __repr__(self):
- if self.bucket:
- name = u'<Key: %s,%s>' % (self.bucket.name, self.name)
- else:
- name = u'<Key: None,%s>' % self.name
-
- # Encode to bytes for Python 2 to prevent display decoding issues
- if not isinstance(name, str):
- name = name.encode('utf-8')
-
- return name
-
- def __iter__(self):
- return self
-
- @property
- def provider(self):
- provider = None
- if self.bucket and self.bucket.connection:
- provider = self.bucket.connection.provider
- return provider
-
- def _get_key(self):
- return self.name
-
- def _set_key(self, value):
- self.name = value
-
- key = property(_get_key, _set_key);
-
- def _get_md5(self):
- if 'md5' in self.local_hashes and self.local_hashes['md5']:
- return binascii.b2a_hex(self.local_hashes['md5'])
-
- def _set_md5(self, value):
- if value:
- self.local_hashes['md5'] = binascii.a2b_hex(value)
- elif 'md5' in self.local_hashes:
- self.local_hashes.pop('md5', None)
-
- md5 = property(_get_md5, _set_md5);
-
- def _get_base64md5(self):
- if 'md5' in self.local_hashes and self.local_hashes['md5']:
- md5 = self.local_hashes['md5']
- if not isinstance(md5, bytes):
- md5 = md5.encode('utf-8')
- return binascii.b2a_base64(md5).decode('utf-8').rstrip('\n')
-
- def _set_base64md5(self, value):
- if value:
- if not isinstance(value, six.string_types):
- value = value.decode('utf-8')
- self.local_hashes['md5'] = binascii.a2b_base64(value)
- elif 'md5' in self.local_hashes:
- del self.local_hashes['md5']
-
- base64md5 = property(_get_base64md5, _set_base64md5);
-
- def _get_storage_class(self):
- if self._storage_class is None and self.bucket:
- # Attempt to fetch storage class
- list_items = list(self.bucket.list(self.name.encode('utf-8')))
- if len(list_items) and getattr(list_items[0], '_storage_class',
- None):
- self._storage_class = list_items[0]._storage_class
- else:
- # Key is not yet saved? Just use default...
- self._storage_class = 'STANDARD'
-
- return self._storage_class
-
- def _set_storage_class(self, value):
- self._storage_class = value
-
- storage_class = property(_get_storage_class, _set_storage_class)
-
- def get_md5_from_hexdigest(self, md5_hexdigest):
- """
- A utility function to create the 2-tuple (md5hexdigest, base64md5)
- from just having a precalculated md5_hexdigest.
- """
- digest = binascii.unhexlify(md5_hexdigest)
- base64md5 = encodebytes(digest)
- if base64md5[-1] == '\n':
- base64md5 = base64md5[0:-1]
- return (md5_hexdigest, base64md5)
-
- def handle_encryption_headers(self, resp):
- provider = self.bucket.connection.provider
- if provider.server_side_encryption_header:
- self.encrypted = resp.getheader(
- provider.server_side_encryption_header, None)
- else:
- self.encrypted = None
-
- def handle_version_headers(self, resp, force=False):
- provider = self.bucket.connection.provider
- # If the Key object already has a version_id attribute value, it
- # means that it represents an explicit version and the user is
- # doing a get_contents_*(version_id=<foo>) to retrieve another
- # version of the Key. In that case, we don't really want to
- # overwrite the version_id in this Key object. Comprende?
- if self.version_id is None or force:
- self.version_id = resp.getheader(provider.version_id, None)
- self.source_version_id = resp.getheader(provider.copy_source_version_id,
- None)
- if resp.getheader(provider.delete_marker, 'false') == 'true':
- self.delete_marker = True
- else:
- self.delete_marker = False
-
- def handle_restore_headers(self, response):
- provider = self.bucket.connection.provider
- header = response.getheader(provider.restore_header)
- if header is None:
- return
- parts = header.split(',', 1)
- for part in parts:
- key, val = [i.strip() for i in part.split('=')]
- val = val.replace('"', '')
- if key == 'ongoing-request':
- self.ongoing_restore = True if val.lower() == 'true' else False
- elif key == 'expiry-date':
- self.expiry_date = val
-
- def handle_addl_headers(self, headers):
- """
- Used by Key subclasses to do additional, provider-specific
- processing of response headers. No-op for this base class.
- """
- pass
-
- def open_read(self, headers=None, query_args='',
- override_num_retries=None, response_headers=None):
- """
- Open this key for reading
-
- :type headers: dict
- :param headers: Headers to pass in the web request
-
- :type query_args: string
- :param query_args: Arguments to pass in the query string
- (ie, 'torrent')
-
- :type override_num_retries: int
- :param override_num_retries: If not None will override configured
- num_retries parameter for underlying GET.
-
- :type response_headers: dict
- :param response_headers: A dictionary containing HTTP
- headers/values that will override any headers associated
- with the stored object in the response. See
- http://goo.gl/EWOPb for details.
- """
- if self.resp is None:
- self.mode = 'r'
-
- provider = self.bucket.connection.provider
- self.resp = self.bucket.connection.make_request(
- 'GET', self.bucket.name, self.name, headers,
- query_args=query_args,
- override_num_retries=override_num_retries)
- if self.resp.status < 199 or self.resp.status > 299:
- body = self.resp.read()
- raise provider.storage_response_error(self.resp.status,
- self.resp.reason, body)
- response_headers = self.resp.msg
- self.metadata = boto.utils.get_aws_metadata(response_headers,
- provider)
- for name, value in response_headers.items():
- # To get correct size for Range GETs, use Content-Range
- # header if one was returned. If not, use Content-Length
- # header.
- if (name.lower() == 'content-length' and
- 'Content-Range' not in response_headers):
- self.size = int(value)
- elif name.lower() == 'content-range':
- end_range = re.sub('.*/(.*)', '\\1', value)
- self.size = int(end_range)
- elif name.lower() in Key.base_fields:
- self.__dict__[name.lower().replace('-', '_')] = value
- self.handle_version_headers(self.resp)
- self.handle_encryption_headers(self.resp)
- self.handle_restore_headers(self.resp)
- self.handle_addl_headers(self.resp.getheaders())
-
- def open_write(self, headers=None, override_num_retries=None):
- """
- Open this key for writing.
- Not yet implemented
-
- :type headers: dict
- :param headers: Headers to pass in the write request
-
- :type override_num_retries: int
- :param override_num_retries: If not None will override configured
- num_retries parameter for underlying PUT.
- """
- raise BotoClientError('Not Implemented')
-
- def open(self, mode='r', headers=None, query_args=None,
- override_num_retries=None):
- if mode == 'r':
- self.mode = 'r'
- self.open_read(headers=headers, query_args=query_args,
- override_num_retries=override_num_retries)
- elif mode == 'w':
- self.mode = 'w'
- self.open_write(headers=headers,
- override_num_retries=override_num_retries)
- else:
- raise BotoClientError('Invalid mode: %s' % mode)
-
- closed = False
-
- def close(self, fast=False):
- """
- Close this key.
-
- :type fast: bool
- :param fast: True if you want the connection to be closed without first
- reading the content. This should only be used in cases where subsequent
- calls don't need to return the content from the open HTTP connection.
- Note: As explained at
- http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse,
- callers must read the whole response before sending a new request to the
- server. Calling Key.close(fast=True) and making a subsequent request to
- the server will work because boto will get an httplib exception and
- close/reopen the connection.
-
- """
- if self.resp and not fast:
- self.resp.read()
- self.resp = None
- self.mode = None
- self.closed = True
-
- def next(self):
- """
- By providing a next method, the key object supports use as an iterator.
- For example, you can now say:
-
- for bytes in key:
- write bytes to a file or whatever
-
- All of the HTTP connection stuff is handled for you.
- """
- self.open_read()
- data = self.resp.read(self.BufferSize)
- if not data:
- self.close()
- raise StopIteration
- return data
-
- # Python 3 iterator support
- __next__ = next
-
- def read(self, size=0):
- self.open_read()
- if size == 0:
- data = self.resp.read()
- else:
- data = self.resp.read(size)
- if not data:
- self.close()
- return data
-
- def change_storage_class(self, new_storage_class, dst_bucket=None,
- validate_dst_bucket=True):
- """
- Change the storage class of an existing key.
- Depending on whether a different destination bucket is supplied
- or not, this will either move the item within the bucket, preserving
- all metadata and ACL info bucket changing the storage class or it
- will copy the item to the provided destination bucket, also
- preserving metadata and ACL info.
-
- :type new_storage_class: string
- :param new_storage_class: The new storage class for the Key.
- Possible values are:
- * STANDARD
- * REDUCED_REDUNDANCY
-
- :type dst_bucket: string
- :param dst_bucket: The name of a destination bucket. If not
- provided the current bucket of the key will be used.
-
- :type validate_dst_bucket: bool
- :param validate_dst_bucket: If True, will validate the dst_bucket
- by using an extra list request.
- """
- bucket_name = dst_bucket or self.bucket.name
- if new_storage_class == 'STANDARD':
- return self.copy(bucket_name, self.name,
- reduced_redundancy=False, preserve_acl=True,
- validate_dst_bucket=validate_dst_bucket)
- elif new_storage_class == 'REDUCED_REDUNDANCY':
- return self.copy(bucket_name, self.name,
- reduced_redundancy=True, preserve_acl=True,
- validate_dst_bucket=validate_dst_bucket)
- else:
- raise BotoClientError('Invalid storage class: %s' %
- new_storage_class)
-
- def copy(self, dst_bucket, dst_key, metadata=None,
- reduced_redundancy=False, preserve_acl=False,
- encrypt_key=False, validate_dst_bucket=True):
- """
- Copy this Key to another bucket.
-
- :type dst_bucket: string
- :param dst_bucket: The name of the destination bucket
-
- :type dst_key: string
- :param dst_key: The name of the destination key
-
- :type metadata: dict
- :param metadata: Metadata to be associated with new key. If
- metadata is supplied, it will replace the metadata of the
- source key being copied. If no metadata is supplied, the
- source key's metadata will be copied to the new key.
-
- :type reduced_redundancy: bool
- :param reduced_redundancy: If True, this will force the
- storage class of the new Key to be REDUCED_REDUNDANCY
- regardless of the storage class of the key being copied.
- The Reduced Redundancy Storage (RRS) feature of S3,
- provides lower redundancy at lower storage cost.
-
- :type preserve_acl: bool
- :param preserve_acl: If True, the ACL from the source key will
- be copied to the destination key. If False, the
- destination key will have the default ACL. Note that
- preserving the ACL in the new key object will require two
- additional API calls to S3, one to retrieve the current
- ACL and one to set that ACL on the new object. If you
- don't care about the ACL, a value of False will be
- significantly more efficient.
-
- :type encrypt_key: bool
- :param encrypt_key: If True, the new copy of the object will
- be encrypted on the server-side by S3 and will be stored
- in an encrypted form while at rest in S3.
-
- :type validate_dst_bucket: bool
- :param validate_dst_bucket: If True, will validate the dst_bucket
- by using an extra list request.
-
- :rtype: :class:`boto.s3.key.Key` or subclass
- :returns: An instance of the newly created key object
- """
- dst_bucket = self.bucket.connection.lookup(dst_bucket,
- validate_dst_bucket)
- if reduced_redundancy:
- storage_class = 'REDUCED_REDUNDANCY'
- else:
- storage_class = self.storage_class
- return dst_bucket.copy_key(dst_key, self.bucket.name,
- self.name, metadata,
- storage_class=storage_class,
- preserve_acl=preserve_acl,
- encrypt_key=encrypt_key,
- src_version_id=self.version_id)
-
- def startElement(self, name, attrs, connection):
- if name == 'Owner':
- self.owner = User(self)
- return self.owner
- else:
- return None
-
- def endElement(self, name, value, connection):
- if name == 'Key':
- self.name = value
- elif name == 'ETag':
- self.etag = value
- elif name == 'IsLatest':
- if value == 'true':
- self.is_latest = True
- else:
- self.is_latest = False
- elif name == 'LastModified':
- self.last_modified = value
- elif name == 'Size':
- self.size = int(value)
- elif name == 'StorageClass':
- self.storage_class = value
- elif name == 'Owner':
- pass
- elif name == 'VersionId':
- self.version_id = value
- else:
- setattr(self, name, value)
-
- def exists(self, headers=None):
- """
- Returns True if the key exists
-
- :rtype: bool
- :return: Whether the key exists on S3
- """
- return bool(self.bucket.lookup(self.name, headers=headers))
-
- def delete(self, headers=None):
- """
- Delete this key from S3
- """
- return self.bucket.delete_key(self.name, version_id=self.version_id,
- headers=headers)
-
- def get_metadata(self, name):
- return self.metadata.get(name)
-
- def set_metadata(self, name, value):
- # Ensure that metadata that is vital to signing is in the correct
- # case. Applies to ``Content-Type`` & ``Content-MD5``.
- if name.lower() == 'content-type':
- self.metadata['Content-Type'] = value
- elif name.lower() == 'content-md5':
- self.metadata['Content-MD5'] = value
- else:
- self.metadata[name] = value
- if name.lower() in Key.base_user_settable_fields:
- self.__dict__[name.lower().replace('-', '_')] = value
-
- def update_metadata(self, d):
- self.metadata.update(d)
-
- # convenience methods for setting/getting ACL
- def set_acl(self, acl_str, headers=None):
- if self.bucket is not None:
- self.bucket.set_acl(acl_str, self.name, headers=headers)
-
- def get_acl(self, headers=None):
- if self.bucket is not None:
- return self.bucket.get_acl(self.name, headers=headers)
-
- def get_xml_acl(self, headers=None):
- if self.bucket is not None:
- return self.bucket.get_xml_acl(self.name, headers=headers)
-
- def set_xml_acl(self, acl_str, headers=None):
- if self.bucket is not None:
- return self.bucket.set_xml_acl(acl_str, self.name, headers=headers)
-
- def set_canned_acl(self, acl_str, headers=None):
- return self.bucket.set_canned_acl(acl_str, self.name, headers)
-
- def get_redirect(self):
- """Return the redirect location configured for this key.
-
- If no redirect is configured (via set_redirect), then None
- will be returned.
-
- """
- response = self.bucket.connection.make_request(
- 'HEAD', self.bucket.name, self.name)
- if response.status == 200:
- return response.getheader('x-amz-website-redirect-location')
- else:
- raise self.provider.storage_response_error(
- response.status, response.reason, response.read())
-
- def set_redirect(self, redirect_location, headers=None):
- """Configure this key to redirect to another location.
-
- When the bucket associated with this key is accessed from the website
- endpoint, a 301 redirect will be issued to the specified
- `redirect_location`.
-
- :type redirect_location: string
- :param redirect_location: The location to redirect.
-
- """
- if headers is None:
- headers = {}
- else:
- headers = headers.copy()
-
- headers['x-amz-website-redirect-location'] = redirect_location
- response = self.bucket.connection.make_request('PUT', self.bucket.name,
- self.name, headers)
- if response.status == 200:
- return True
- else:
- raise self.provider.storage_response_error(
- response.status, response.reason, response.read())
-
- def make_public(self, headers=None):
- return self.bucket.set_canned_acl('public-read', self.name, headers)
-
- def generate_url(self, expires_in, method='GET', headers=None,
- query_auth=True, force_http=False, response_headers=None,
- expires_in_absolute=False, version_id=None,
- policy=None, reduced_redundancy=False, encrypt_key=False):
- """
- Generate a URL to access this key.
-
- :type expires_in: int
- :param expires_in: How long the url is valid for, in seconds
-
- :type method: string
- :param method: The method to use for retrieving the file
- (default is GET)
-
- :type headers: dict
- :param headers: Any headers to pass along in the request
-
- :type query_auth: bool
- :param query_auth:
-
- :type force_http: bool
- :param force_http: If True, http will be used instead of https.
-
- :type response_headers: dict
- :param response_headers: A dictionary containing HTTP
- headers/values that will override any headers associated
- with the stored object in the response. See
- http://goo.gl/EWOPb for details.
-
- :type expires_in_absolute: bool
- :param expires_in_absolute:
-
- :type version_id: string
- :param version_id: The version_id of the object to GET. If specified
- this overrides any value in the key.
-
- :type policy: :class:`boto.s3.acl.CannedACLStrings`
- :param policy: A canned ACL policy that will be applied to the
- new key in S3.
-
- :type reduced_redundancy: bool
- :param reduced_redundancy: If True, this will set the storage
- class of the new Key to be REDUCED_REDUNDANCY. The Reduced
- Redundancy Storage (RRS) feature of S3, provides lower
- redundancy at lower storage cost.
-
- :type encrypt_key: bool
- :param encrypt_key: If True, the new copy of the object will
- be encrypted on the server-side by S3 and will be stored
- in an encrypted form while at rest in S3.
-
- :rtype: string
- :return: The URL to access the key
- """
- provider = self.bucket.connection.provider
- version_id = version_id or self.version_id
- if headers is None:
- headers = {}
- else:
- headers = headers.copy()
-
- # add headers accordingly (usually PUT case)
- if policy:
- headers[provider.acl_header] = policy
- if reduced_redundancy:
- self.storage_class = 'REDUCED_REDUNDANCY'
- if provider.storage_class_header:
- headers[provider.storage_class_header] = self.storage_class
- if encrypt_key:
- headers[provider.server_side_encryption_header] = 'AES256'
- headers = boto.utils.merge_meta(headers, self.metadata, provider)
-
- return self.bucket.connection.generate_url(expires_in, method,
- self.bucket.name, self.name,
- headers, query_auth,
- force_http,
- response_headers,
- expires_in_absolute,
- version_id)
-
- def send_file(self, fp, headers=None, cb=None, num_cb=10,
- query_args=None, chunked_transfer=False, size=None):
- """
- Upload a file to a key into a bucket on S3.
-
- :type fp: file
- :param fp: The file pointer to upload. The file pointer must
- point point at the offset from which you wish to upload.
- ie. if uploading the full file, it should point at the
- start of the file. Normally when a file is opened for
- reading, the fp will point at the first byte. See the
- bytes parameter below for more info.
-
- :type headers: dict
- :param headers: The headers to pass along with the PUT request
-
- :type num_cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file
- transfer. Providing a negative integer will cause your
- callback to be called with each buffer read.
-
- :type query_args: string
- :param query_args: (optional) Arguments to pass in the query string.
-
- :type chunked_transfer: boolean
- :param chunked_transfer: (optional) If true, we use chunked
- Transfer-Encoding.
-
- :type size: int
- :param size: (optional) The Maximum number of bytes to read
- from the file pointer (fp). This is useful when uploading
- a file in multiple parts where you are splitting the file
- up into different ranges to be uploaded. If not specified,
- the default behaviour is to read all bytes from the file
- pointer. Less bytes may be available.
- """
- self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
- query_args=query_args,
- chunked_transfer=chunked_transfer, size=size)
-
- def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10,
- query_args=None, chunked_transfer=False, size=None,
- hash_algs=None):
- provider = self.bucket.connection.provider
- try:
- spos = fp.tell()
- except IOError:
- spos = None
- self.read_from_stream = False
-
- # If hash_algs is unset and the MD5 hasn't already been computed,
- # default to an MD5 hash_alg to hash the data on-the-fly.
- if hash_algs is None and not self.md5:
- hash_algs = {'md5': md5}
- digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
-
- def sender(http_conn, method, path, data, headers):
- # This function is called repeatedly for temporary retries
- # so we must be sure the file pointer is pointing at the
- # start of the data.
- if spos is not None and spos != fp.tell():
- fp.seek(spos)
- elif spos is None and self.read_from_stream:
- # if seek is not supported, and we've read from this
- # stream already, then we need to abort retries to
- # avoid setting bad data.
- raise provider.storage_data_error(
- 'Cannot retry failed request. fp does not support seeking.')
-
- # If the caller explicitly specified host header, tell putrequest
- # not to add a second host header. Similarly for accept-encoding.
- skips = {}
- if boto.utils.find_matching_headers('host', headers):
- skips['skip_host'] = 1
- if boto.utils.find_matching_headers('accept-encoding', headers):
- skips['skip_accept_encoding'] = 1
- http_conn.putrequest(method, path, **skips)
- for key in headers:
- http_conn.putheader(key, headers[key])
- http_conn.endheaders()
-
- save_debug = self.bucket.connection.debug
- self.bucket.connection.debug = 0
- # If the debuglevel < 4 we don't want to show connection
- # payload, so turn off HTTP connection-level debug output (to
- # be restored below).
- # Use the getattr approach to allow this to work in AppEngine.
- if getattr(http_conn, 'debuglevel', 0) < 4:
- http_conn.set_debuglevel(0)
-
- data_len = 0
- if cb:
- if size:
- cb_size = size
- elif self.size:
- cb_size = self.size
- else:
- cb_size = 0
- if chunked_transfer and cb_size == 0:
- # For chunked Transfer, we call the cb for every 1MB
- # of data transferred, except when we know size.
- cb_count = (1024 * 1024) / self.BufferSize
- elif num_cb > 1:
- cb_count = int(
- math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
- elif num_cb < 0:
- cb_count = -1
- else:
- cb_count = 0
- i = 0
- cb(data_len, cb_size)
-
- bytes_togo = size
- if bytes_togo and bytes_togo < self.BufferSize:
- chunk = fp.read(bytes_togo)
- else:
- chunk = fp.read(self.BufferSize)
-
- if not isinstance(chunk, bytes):
- chunk = chunk.encode('utf-8')
-
- if spos is None:
- # read at least something from a non-seekable fp.
- self.read_from_stream = True
- while chunk:
- chunk_len = len(chunk)
- data_len += chunk_len
- if chunked_transfer:
- http_conn.send('%x;\r\n' % chunk_len)
- http_conn.send(chunk)
- http_conn.send('\r\n')
- else:
- http_conn.send(chunk)
- for alg in digesters:
- digesters[alg].update(chunk)
- if bytes_togo:
- bytes_togo -= chunk_len
- if bytes_togo <= 0:
- break
- if cb:
- i += 1
- if i == cb_count or cb_count == -1:
- cb(data_len, cb_size)
- i = 0
- if bytes_togo and bytes_togo < self.BufferSize:
- chunk = fp.read(bytes_togo)
- else:
- chunk = fp.read(self.BufferSize)
-
- if not isinstance(chunk, bytes):
- chunk = chunk.encode('utf-8')
-
- self.size = data_len
-
- for alg in digesters:
- self.local_hashes[alg] = digesters[alg].digest()
-
- if chunked_transfer:
- http_conn.send('0\r\n')
- # http_conn.send("Content-MD5: %s\r\n" % self.base64md5)
- http_conn.send('\r\n')
-
- if cb and (cb_count <= 1 or i > 0) and data_len > 0:
- cb(data_len, cb_size)
-
- http_conn.set_debuglevel(save_debug)
- self.bucket.connection.debug = save_debug
- response = http_conn.getresponse()
- body = response.read()
-
- if not self.should_retry(response, chunked_transfer):
- raise provider.storage_response_error(
- response.status, response.reason, body)
-
- return response
-
- if not headers:
- headers = {}
- else:
- headers = headers.copy()
- # Overwrite user-supplied user-agent.
- for header in find_matching_headers('User-Agent', headers):
- del headers[header]
- headers['User-Agent'] = UserAgent
- # If storage_class is None, then a user has not explicitly requested
- # a storage class, so we can assume STANDARD here
- if self._storage_class not in [None, 'STANDARD']:
- headers[provider.storage_class_header] = self.storage_class
- if find_matching_headers('Content-Encoding', headers):
- self.content_encoding = merge_headers_by_name(
- 'Content-Encoding', headers)
- if find_matching_headers('Content-Language', headers):
- self.content_language = merge_headers_by_name(
- 'Content-Language', headers)
- content_type_headers = find_matching_headers('Content-Type', headers)
- if content_type_headers:
- # Some use cases need to suppress sending of the Content-Type
- # header and depend on the receiving server to set the content
- # type. This can be achieved by setting headers['Content-Type']
- # to None when calling this method.
- if (len(content_type_headers) == 1 and
- headers[content_type_headers[0]] is None):
- # Delete null Content-Type value to skip sending that header.
- del headers[content_type_headers[0]]
- else:
- self.content_type = merge_headers_by_name(
- 'Content-Type', headers)
- elif self.path:
- self.content_type = mimetypes.guess_type(self.path)[0]
- if self.content_type is None:
- self.content_type = self.DefaultContentType
- headers['Content-Type'] = self.content_type
- else:
- headers['Content-Type'] = self.content_type
- if self.base64md5:
- headers['Content-MD5'] = self.base64md5
- if chunked_transfer:
- headers['Transfer-Encoding'] = 'chunked'
- #if not self.base64md5:
- # headers['Trailer'] = "Content-MD5"
- else:
- headers['Content-Length'] = str(self.size)
- # This is terrible. We need a SHA256 of the body for SigV4, but to do
- # the chunked ``sender`` behavior above, the ``fp`` isn't available to
- # the auth mechanism (because closures). Detect if it's SigV4 & embelish
- # while we can before the auth calculations occur.
- if 'hmac-v4-s3' in self.bucket.connection._required_auth_capability():
- kwargs = {'fp': fp, 'hash_algorithm': hashlib.sha256}
- if size is not None:
- kwargs['size'] = size
- headers['_sha256'] = compute_hash(**kwargs)[0]
- headers['Expect'] = '100-Continue'
- headers = boto.utils.merge_meta(headers, self.metadata, provider)
- resp = self.bucket.connection.make_request(
- 'PUT',
- self.bucket.name,
- self.name,
- headers,
- sender=sender,
- query_args=query_args
- )
- self.handle_version_headers(resp, force=True)
- self.handle_addl_headers(resp.getheaders())
-
- def should_retry(self, response, chunked_transfer=False):
- provider = self.bucket.connection.provider
-
- if not chunked_transfer:
- if response.status in [500, 503]:
- # 500 & 503 can be plain retries.
- return True
-
- if response.getheader('location'):
- # If there's a redirect, plain retry.
- return True
-
- if 200 <= response.status <= 299:
- self.etag = response.getheader('etag')
- md5 = self.md5
- if isinstance(md5, bytes):
- md5 = md5.decode('utf-8')
-
- # If you use customer-provided encryption keys, the ETag value that
- # Amazon S3 returns in the response will not be the MD5 of the
- # object.
- server_side_encryption_customer_algorithm = response.getheader(
- 'x-amz-server-side-encryption-customer-algorithm', None)
- if server_side_encryption_customer_algorithm is None:
- if self.etag != '"%s"' % md5:
- raise provider.storage_data_error(
- 'ETag from S3 did not match computed MD5. '
- '%s vs. %s' % (self.etag, self.md5))
-
- return True
-
- if response.status == 400:
- # The 400 must be trapped so the retry handler can check to
- # see if it was a timeout.
- # If ``RequestTimeout`` is present, we'll retry. Otherwise, bomb
- # out.
- body = response.read()
- err = provider.storage_response_error(
- response.status,
- response.reason,
- body
- )
-
- if err.error_code in ['RequestTimeout']:
- raise PleaseRetryException(
- "Saw %s, retrying" % err.error_code,
- response=response
- )
-
- return False
-
- def compute_md5(self, fp, size=None):
- """
- :type fp: file
- :param fp: File pointer to the file to MD5 hash. The file
- pointer will be reset to the same position before the
- method returns.
-
- :type size: int
- :param size: (optional) The Maximum number of bytes to read
- from the file pointer (fp). This is useful when uploading
- a file in multiple parts where the file is being split
- in place into different parts. Less bytes may be available.
- """
- hex_digest, b64_digest, data_size = compute_md5(fp, size=size)
- # Returned values are MD5 hash, base64 encoded MD5 hash, and data size.
- # The internal implementation of compute_md5() needs to return the
- # data size but we don't want to return that value to the external
- # caller because it changes the class interface (i.e. it might
- # break some code) so we consume the third tuple value here and
- # return the remainder of the tuple to the caller, thereby preserving
- # the existing interface.
- self.size = data_size
- return (hex_digest, b64_digest)
-
- def set_contents_from_stream(self, fp, headers=None, replace=True,
- cb=None, num_cb=10, policy=None,
- reduced_redundancy=False, query_args=None,
- size=None):
- """
- Store an object using the name of the Key object as the key in
- cloud and the contents of the data stream pointed to by 'fp' as
- the contents.
-
- The stream object is not seekable and total size is not known.
- This has the implication that we can't specify the
- Content-Size and Content-MD5 in the header. So for huge
- uploads, the delay in calculating MD5 is avoided but with a
- penalty of inability to verify the integrity of the uploaded
- data.
-
- :type fp: file
- :param fp: the file whose contents are to be uploaded
-
- :type headers: dict
- :param headers: additional HTTP headers to be sent with the
- PUT request.
-
- :type replace: bool
- :param replace: If this parameter is False, the method will first check
- to see if an object exists in the bucket with the same key. If it
- does, it won't overwrite it. The default value is True which will
- overwrite the object.
-
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two integer
- parameters, the first representing the number of bytes that have
- been successfully transmitted to GS and the second representing the
- total number of bytes that need to be transmitted.
-
- :type num_cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter, this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file transfer.
-
- :type policy: :class:`boto.gs.acl.CannedACLStrings`
- :param policy: A canned ACL policy that will be applied to the new key
- in GS.
-
- :type reduced_redundancy: bool
- :param reduced_redundancy: If True, this will set the storage
- class of the new Key to be REDUCED_REDUNDANCY. The Reduced
- Redundancy Storage (RRS) feature of S3, provides lower
- redundancy at lower storage cost.
-
- :type size: int
- :param size: (optional) The Maximum number of bytes to read from
- the file pointer (fp). This is useful when uploading a
- file in multiple parts where you are splitting the file up
- into different ranges to be uploaded. If not specified,
- the default behaviour is to read all bytes from the file
- pointer. Less bytes may be available.
- """
-
- provider = self.bucket.connection.provider
- if not provider.supports_chunked_transfer():
- raise BotoClientError('%s does not support chunked transfer'
- % provider.get_provider_name())
-
- # Name of the Object should be specified explicitly for Streams.
- if not self.name or self.name == '':
- raise BotoClientError('Cannot determine the destination '
- 'object name for the given stream')
-
- if headers is None:
- headers = {}
- if policy:
- headers[provider.acl_header] = policy
-
- if reduced_redundancy:
- self.storage_class = 'REDUCED_REDUNDANCY'
- if provider.storage_class_header:
- headers[provider.storage_class_header] = self.storage_class
-
- if self.bucket is not None:
- if not replace:
- if self.bucket.lookup(self.name):
- return
- self.send_file(fp, headers, cb, num_cb, query_args,
- chunked_transfer=True, size=size)
-
- def set_contents_from_file(self, fp, headers=None, replace=True,
- cb=None, num_cb=10, policy=None, md5=None,
- reduced_redundancy=False, query_args=None,
- encrypt_key=False, size=None, rewind=False):
- """
- Store an object in S3 using the name of the Key object as the
- key in S3 and the contents of the file pointed to by 'fp' as the
- contents. The data is read from 'fp' from its current position until
- 'size' bytes have been read or EOF.
-
- :type fp: file
- :param fp: the file whose contents to upload
-
- :type headers: dict
- :param headers: Additional HTTP headers that will be sent with
- the PUT request.
-
- :type replace: bool
- :param replace: If this parameter is False, the method will
- first check to see if an object exists in the bucket with
- the same key. If it does, it won't overwrite it. The
- default value is True which will overwrite the object.
-
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two
- integer parameters, the first representing the number of
- bytes that have been successfully transmitted to S3 and
- the second representing the size of the to be transmitted
- object.
-
- :type num_cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file transfer.
-
- :type policy: :class:`boto.s3.acl.CannedACLStrings`
- :param policy: A canned ACL policy that will be applied to the
- new key in S3.
-
- :type md5: A tuple containing the hexdigest version of the MD5
- checksum of the file as the first element and the
- Base64-encoded version of the plain checksum as the second
- element. This is the same format returned by the
- compute_md5 method.
- :param md5: If you need to compute the MD5 for any reason
- prior to upload, it's silly to have to do it twice so this
- param, if present, will be used as the MD5 values of the
- file. Otherwise, the checksum will be computed.
-
- :type reduced_redundancy: bool
- :param reduced_redundancy: If True, this will set the storage
- class of the new Key to be REDUCED_REDUNDANCY. The Reduced
- Redundancy Storage (RRS) feature of S3, provides lower
- redundancy at lower storage cost.
-
- :type encrypt_key: bool
- :param encrypt_key: If True, the new copy of the object will
- be encrypted on the server-side by S3 and will be stored
- in an encrypted form while at rest in S3.
-
- :type size: int
- :param size: (optional) The Maximum number of bytes to read
- from the file pointer (fp). This is useful when uploading
- a file in multiple parts where you are splitting the file
- up into different ranges to be uploaded. If not specified,
- the default behaviour is to read all bytes from the file
- pointer. Less bytes may be available.
-
- :type rewind: bool
- :param rewind: (optional) If True, the file pointer (fp) will
- be rewound to the start before any bytes are read from
- it. The default behaviour is False which reads from the
- current position of the file pointer (fp).
-
- :rtype: int
- :return: The number of bytes written to the key.
- """
- provider = self.bucket.connection.provider
- headers = headers or {}
- if policy:
- headers[provider.acl_header] = policy
- if encrypt_key:
- headers[provider.server_side_encryption_header] = 'AES256'
-
- if rewind:
- # caller requests reading from beginning of fp.
- fp.seek(0, os.SEEK_SET)
- else:
- # The following seek/tell/seek logic is intended
- # to detect applications using the older interface to
- # set_contents_from_file(), which automatically rewound the
- # file each time the Key was reused. This changed with commit
- # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads
- # split into multiple parts and uploaded in parallel, and at
- # the time of that commit this check was added because otherwise
- # older programs would get a success status and upload an empty
- # object. Unfortuantely, it's very inefficient for fp's implemented
- # by KeyFile (used, for example, by gsutil when copying between
- # providers). So, we skip the check for the KeyFile case.
- # TODO: At some point consider removing this seek/tell/seek
- # logic, after enough time has passed that it's unlikely any
- # programs remain that assume the older auto-rewind interface.
- if not isinstance(fp, KeyFile):
- spos = fp.tell()
- fp.seek(0, os.SEEK_END)
- if fp.tell() == spos:
- fp.seek(0, os.SEEK_SET)
- if fp.tell() != spos:
- # Raise an exception as this is likely a programming
- # error whereby there is data before the fp but nothing
- # after it.
- fp.seek(spos)
- raise AttributeError('fp is at EOF. Use rewind option '
- 'or seek() to data start.')
- # seek back to the correct position.
- fp.seek(spos)
-
- if reduced_redundancy:
- self.storage_class = 'REDUCED_REDUNDANCY'
- if provider.storage_class_header:
- headers[provider.storage_class_header] = self.storage_class
- # TODO - What if provider doesn't support reduced reduncancy?
- # What if different providers provide different classes?
- if hasattr(fp, 'name'):
- self.path = fp.name
- if self.bucket is not None:
- if not md5 and provider.supports_chunked_transfer():
- # defer md5 calculation to on the fly and
- # we don't know anything about size yet.
- chunked_transfer = True
- self.size = None
- else:
- chunked_transfer = False
- if isinstance(fp, KeyFile):
- # Avoid EOF seek for KeyFile case as it's very inefficient.
- key = fp.getkey()
- size = key.size - fp.tell()
- self.size = size
- # At present both GCS and S3 use MD5 for the etag for
- # non-multipart-uploaded objects. If the etag is 32 hex
- # chars use it as an MD5, to avoid having to read the file
- # twice while transferring.
- if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)):
- etag = key.etag.strip('"')
- md5 = (etag, base64.b64encode(binascii.unhexlify(etag)))
- if not md5:
- # compute_md5() and also set self.size to actual
- # size of the bytes read computing the md5.
- md5 = self.compute_md5(fp, size)
- # adjust size if required
- size = self.size
- elif size:
- self.size = size
- else:
- # If md5 is provided, still need to size so
- # calculate based on bytes to end of content
- spos = fp.tell()
- fp.seek(0, os.SEEK_END)
- self.size = fp.tell() - spos
- fp.seek(spos)
- size = self.size
- self.md5 = md5[0]
- self.base64md5 = md5[1]
-
- if self.name is None:
- self.name = self.md5
- if not replace:
- if self.bucket.lookup(self.name):
- return
-
- self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,
- query_args=query_args,
- chunked_transfer=chunked_transfer, size=size)
- # return number of bytes written.
- return self.size
-
- def set_contents_from_filename(self, filename, headers=None, replace=True,
- cb=None, num_cb=10, policy=None, md5=None,
- reduced_redundancy=False,
- encrypt_key=False):
- """
- Store an object in S3 using the name of the Key object as the
- key in S3 and the contents of the file named by 'filename'.
- See set_contents_from_file method for details about the
- parameters.
-
- :type filename: string
- :param filename: The name of the file that you want to put onto S3
-
- :type headers: dict
- :param headers: Additional headers to pass along with the
- request to AWS.
-
- :type replace: bool
- :param replace: If True, replaces the contents of the file
- if it already exists.
-
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two
- integer parameters, the first representing the number of
- bytes that have been successfully transmitted to S3 and
- the second representing the size of the to be transmitted
- object.
-
- :type cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file transfer.
-
- :type policy: :class:`boto.s3.acl.CannedACLStrings`
- :param policy: A canned ACL policy that will be applied to the
- new key in S3.
-
- :type md5: A tuple containing the hexdigest version of the MD5
- checksum of the file as the first element and the
- Base64-encoded version of the plain checksum as the second
- element. This is the same format returned by the
- compute_md5 method.
- :param md5: If you need to compute the MD5 for any reason
- prior to upload, it's silly to have to do it twice so this
- param, if present, will be used as the MD5 values of the
- file. Otherwise, the checksum will be computed.
-
- :type reduced_redundancy: bool
- :param reduced_redundancy: If True, this will set the storage
- class of the new Key to be REDUCED_REDUNDANCY. The Reduced
- Redundancy Storage (RRS) feature of S3, provides lower
- redundancy at lower storage cost. :type encrypt_key: bool
- :param encrypt_key: If True, the new copy of the object
- will be encrypted on the server-side by S3 and will be
- stored in an encrypted form while at rest in S3.
-
- :rtype: int
- :return: The number of bytes written to the key.
- """
- with open(filename, 'rb') as fp:
- return self.set_contents_from_file(fp, headers, replace, cb,
- num_cb, policy, md5,
- reduced_redundancy,
- encrypt_key=encrypt_key)
-
- def set_contents_from_string(self, string_data, headers=None, replace=True,
- cb=None, num_cb=10, policy=None, md5=None,
- reduced_redundancy=False,
- encrypt_key=False):
- """
- Store an object in S3 using the name of the Key object as the
- key in S3 and the string 's' as the contents.
- See set_contents_from_file method for details about the
- parameters.
-
- :type headers: dict
- :param headers: Additional headers to pass along with the
- request to AWS.
-
- :type replace: bool
- :param replace: If True, replaces the contents of the file if
- it already exists.
-
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two
- integer parameters, the first representing the number of
- bytes that have been successfully transmitted to S3 and
- the second representing the size of the to be transmitted
- object.
-
- :type cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file transfer.
-
- :type policy: :class:`boto.s3.acl.CannedACLStrings`
- :param policy: A canned ACL policy that will be applied to the
- new key in S3.
-
- :type md5: A tuple containing the hexdigest version of the MD5
- checksum of the file as the first element and the
- Base64-encoded version of the plain checksum as the second
- element. This is the same format returned by the
- compute_md5 method.
- :param md5: If you need to compute the MD5 for any reason
- prior to upload, it's silly to have to do it twice so this
- param, if present, will be used as the MD5 values of the
- file. Otherwise, the checksum will be computed.
-
- :type reduced_redundancy: bool
- :param reduced_redundancy: If True, this will set the storage
- class of the new Key to be REDUCED_REDUNDANCY. The Reduced
- Redundancy Storage (RRS) feature of S3, provides lower
- redundancy at lower storage cost.
-
- :type encrypt_key: bool
- :param encrypt_key: If True, the new copy of the object will
- be encrypted on the server-side by S3 and will be stored
- in an encrypted form while at rest in S3.
- """
- if not isinstance(string_data, bytes):
- string_data = string_data.encode("utf-8")
- fp = BytesIO(string_data)
- r = self.set_contents_from_file(fp, headers, replace, cb, num_cb,
- policy, md5, reduced_redundancy,
- encrypt_key=encrypt_key)
- fp.close()
- return r
-
- def get_file(self, fp, headers=None, cb=None, num_cb=10,
- torrent=False, version_id=None, override_num_retries=None,
- response_headers=None):
- """
- Retrieves a file from an S3 Key
-
- :type fp: file
- :param fp: File pointer to put the data into
-
- :type headers: string
- :param: headers to send when retrieving the files
-
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two
- integer parameters, the first representing the number of
- bytes that have been successfully transmitted to S3 and
- the second representing the size of the to be transmitted
- object.
-
- :type cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file transfer.
-
- :type torrent: bool
- :param torrent: Flag for whether to get a torrent for the file
-
- :type override_num_retries: int
- :param override_num_retries: If not None will override configured
- num_retries parameter for underlying GET.
-
- :type response_headers: dict
- :param response_headers: A dictionary containing HTTP
- headers/values that will override any headers associated
- with the stored object in the response. See
- http://goo.gl/EWOPb for details.
-
- :type version_id: str
- :param version_id: The ID of a particular version of the object.
- If this parameter is not supplied but the Key object has
- a ``version_id`` attribute, that value will be used when
- retrieving the object. You can set the Key object's
- ``version_id`` attribute to None to always grab the latest
- version from a version-enabled bucket.
- """
- self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
- torrent=torrent, version_id=version_id,
- override_num_retries=override_num_retries,
- response_headers=response_headers,
- hash_algs=None,
- query_args=None)
-
- def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,
- torrent=False, version_id=None, override_num_retries=None,
- response_headers=None, hash_algs=None, query_args=None):
- if headers is None:
- headers = {}
- save_debug = self.bucket.connection.debug
- if self.bucket.connection.debug == 1:
- self.bucket.connection.debug = 0
-
- query_args = query_args or []
- if torrent:
- query_args.append('torrent')
-
- if hash_algs is None and not torrent:
- hash_algs = {'md5': md5}
- digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
-
- # If a version_id is passed in, use that. If not, check to see
- # if the Key object has an explicit version_id and, if so, use that.
- # Otherwise, don't pass a version_id query param.
- if version_id is None:
- version_id = self.version_id
- if version_id:
- query_args.append('versionId=%s' % version_id)
- if response_headers:
- for key in response_headers:
- query_args.append('%s=%s' % (
- key, urllib.parse.quote(response_headers[key])))
- query_args = '&'.join(query_args)
- self.open('r', headers, query_args=query_args,
- override_num_retries=override_num_retries)
-
- data_len = 0
- if cb:
- if self.size is None:
- cb_size = 0
- else:
- cb_size = self.size
- if self.size is None and num_cb != -1:
- # If size is not available due to chunked transfer for example,
- # we'll call the cb for every 1MB of data transferred.
- cb_count = (1024 * 1024) / self.BufferSize
- elif num_cb > 1:
- cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0)))
- elif num_cb < 0:
- cb_count = -1
- else:
- cb_count = 0
- i = 0
- cb(data_len, cb_size)
- try:
- for bytes in self:
- fp.write(bytes)
- data_len += len(bytes)
- for alg in digesters:
- digesters[alg].update(bytes)
- if cb:
- if cb_size > 0 and data_len >= cb_size:
- break
- i += 1
- if i == cb_count or cb_count == -1:
- cb(data_len, cb_size)
- i = 0
- except IOError as e:
- if e.errno == errno.ENOSPC:
- raise StorageDataError('Out of space for destination file '
- '%s' % fp.name)
- raise
- if cb and (cb_count <= 1 or i > 0) and data_len > 0:
- cb(data_len, cb_size)
- for alg in digesters:
- self.local_hashes[alg] = digesters[alg].digest()
- if self.size is None and not torrent and "Range" not in headers:
- self.size = data_len
- self.close()
- self.bucket.connection.debug = save_debug
-
- def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10):
- """
- Get a torrent file (see to get_file)
-
- :type fp: file
- :param fp: The file pointer of where to put the torrent
-
- :type headers: dict
- :param headers: Headers to be passed
-
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two
- integer parameters, the first representing the number of
- bytes that have been successfully transmitted to S3 and
- the second representing the size of the to be transmitted
- object.
-
- :type cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file transfer.
-
- """
- return self.get_file(fp, headers, cb, num_cb, torrent=True)
-
- def get_contents_to_file(self, fp, headers=None,
- cb=None, num_cb=10,
- torrent=False,
- version_id=None,
- res_download_handler=None,
- response_headers=None):
- """
- Retrieve an object from S3 using the name of the Key object as the
- key in S3. Write the contents of the object to the file pointed
- to by 'fp'.
-
- :type fp: File -like object
- :param fp:
-
- :type headers: dict
- :param headers: additional HTTP headers that will be sent with
- the GET request.
-
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two
- integer parameters, the first representing the number of
- bytes that have been successfully transmitted to S3 and
- the second representing the size of the to be transmitted
- object.
-
- :type cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file transfer.
-
- :type torrent: bool
- :param torrent: If True, returns the contents of a torrent
- file as a string.
-
- :type res_upload_handler: ResumableDownloadHandler
- :param res_download_handler: If provided, this handler will
- perform the download.
-
- :type response_headers: dict
- :param response_headers: A dictionary containing HTTP
- headers/values that will override any headers associated
- with the stored object in the response. See
- http://goo.gl/EWOPb for details.
-
- :type version_id: str
- :param version_id: The ID of a particular version of the object.
- If this parameter is not supplied but the Key object has
- a ``version_id`` attribute, that value will be used when
- retrieving the object. You can set the Key object's
- ``version_id`` attribute to None to always grab the latest
- version from a version-enabled bucket.
- """
- if self.bucket is not None:
- if res_download_handler:
- res_download_handler.get_file(self, fp, headers, cb, num_cb,
- torrent=torrent,
- version_id=version_id)
- else:
- self.get_file(fp, headers, cb, num_cb, torrent=torrent,
- version_id=version_id,
- response_headers=response_headers)
-
- def get_contents_to_filename(self, filename, headers=None,
- cb=None, num_cb=10,
- torrent=False,
- version_id=None,
- res_download_handler=None,
- response_headers=None):
- """
- Retrieve an object from S3 using the name of the Key object as the
- key in S3. Store contents of the object to a file named by 'filename'.
- See get_contents_to_file method for details about the
- parameters.
-
- :type filename: string
- :param filename: The filename of where to put the file contents
-
- :type headers: dict
- :param headers: Any additional headers to send in the request
-
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two
- integer parameters, the first representing the number of
- bytes that have been successfully transmitted to S3 and
- the second representing the size of the to be transmitted
- object.
-
- :type num_cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file transfer.
-
- :type torrent: bool
- :param torrent: If True, returns the contents of a torrent file
- as a string.
-
- :type res_upload_handler: ResumableDownloadHandler
- :param res_download_handler: If provided, this handler will
- perform the download.
-
- :type response_headers: dict
- :param response_headers: A dictionary containing HTTP
- headers/values that will override any headers associated
- with the stored object in the response. See
- http://goo.gl/EWOPb for details.
-
- :type version_id: str
- :param version_id: The ID of a particular version of the object.
- If this parameter is not supplied but the Key object has
- a ``version_id`` attribute, that value will be used when
- retrieving the object. You can set the Key object's
- ``version_id`` attribute to None to always grab the latest
- version from a version-enabled bucket.
- """
- try:
- with open(filename, 'wb') as fp:
- self.get_contents_to_file(fp, headers, cb, num_cb,
- torrent=torrent,
- version_id=version_id,
- res_download_handler=res_download_handler,
- response_headers=response_headers)
- except Exception:
- os.remove(filename)
- raise
- # if last_modified date was sent from s3, try to set file's timestamp
- if self.last_modified is not None:
- try:
- modified_tuple = email.utils.parsedate_tz(self.last_modified)
- modified_stamp = int(email.utils.mktime_tz(modified_tuple))
- os.utime(fp.name, (modified_stamp, modified_stamp))
- except Exception:
- pass
-
- def get_contents_as_string(self, headers=None,
- cb=None, num_cb=10,
- torrent=False,
- version_id=None,
- response_headers=None, encoding=None):
- """
- Retrieve an object from S3 using the name of the Key object as the
- key in S3. Return the contents of the object as a string.
- See get_contents_to_file method for details about the
- parameters.
-
- :type headers: dict
- :param headers: Any additional headers to send in the request
-
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two
- integer parameters, the first representing the number of
- bytes that have been successfully transmitted to S3 and
- the second representing the size of the to be transmitted
- object.
-
- :type cb: int
- :param num_cb: (optional) If a callback is specified with the
- cb parameter this parameter determines the granularity of
- the callback by defining the maximum number of times the
- callback will be called during the file transfer.
-
- :type torrent: bool
- :param torrent: If True, returns the contents of a torrent file
- as a string.
-
- :type response_headers: dict
- :param response_headers: A dictionary containing HTTP
- headers/values that will override any headers associated
- with the stored object in the response. See
- http://goo.gl/EWOPb for details.
-
- :type version_id: str
- :param version_id: The ID of a particular version of the object.
- If this parameter is not supplied but the Key object has
- a ``version_id`` attribute, that value will be used when
- retrieving the object. You can set the Key object's
- ``version_id`` attribute to None to always grab the latest
- version from a version-enabled bucket.
-
- :type encoding: str
- :param encoding: The text encoding to use, such as ``utf-8``
- or ``iso-8859-1``. If set, then a string will be returned.
- Defaults to ``None`` and returns bytes.
-
- :rtype: bytes or str
- :returns: The contents of the file as bytes or a string
- """
- fp = BytesIO()
- self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
- version_id=version_id,
- response_headers=response_headers)
- value = fp.getvalue()
-
- if encoding is not None:
- value = value.decode(encoding)
-
- return value
-
- def add_email_grant(self, permission, email_address, headers=None):
- """
- Convenience method that provides a quick way to add an email grant
- to a key. This method retrieves the current ACL, creates a new
- grant based on the parameters passed in, adds that grant to the ACL
- and then PUT's the new ACL back to S3.
-
- :type permission: string
- :param permission: The permission being granted. Should be one of:
- (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
-
- :type email_address: string
- :param email_address: The email address associated with the AWS
- account your are granting the permission to.
-
- :type recursive: boolean
- :param recursive: A boolean value to controls whether the
- command will apply the grant to all keys within the bucket
- or not. The default value is False. By passing a True
- value, the call will iterate through all keys in the
- bucket and apply the same grant to each key. CAUTION: If
- you have a lot of keys, this could take a long time!
- """
- policy = self.get_acl(headers=headers)
- policy.acl.add_email_grant(permission, email_address)
- self.set_acl(policy, headers=headers)
-
- def add_user_grant(self, permission, user_id, headers=None,
- display_name=None):
- """
- Convenience method that provides a quick way to add a canonical
- user grant to a key. This method retrieves the current ACL,
- creates a new grant based on the parameters passed in, adds that
- grant to the ACL and then PUT's the new ACL back to S3.
-
- :type permission: string
- :param permission: The permission being granted. Should be one of:
- (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
-
- :type user_id: string
- :param user_id: The canonical user id associated with the AWS
- account your are granting the permission to.
-
- :type display_name: string
- :param display_name: An option string containing the user's
- Display Name. Only required on Walrus.
- """
- policy = self.get_acl(headers=headers)
- policy.acl.add_user_grant(permission, user_id,
- display_name=display_name)
- self.set_acl(policy, headers=headers)
-
- def _normalize_metadata(self, metadata):
- if type(metadata) == set:
- norm_metadata = set()
- for k in metadata:
- norm_metadata.add(k.lower())
- else:
- norm_metadata = {}
- for k in metadata:
- norm_metadata[k.lower()] = metadata[k]
- return norm_metadata
-
- def _get_remote_metadata(self, headers=None):
- """
- Extracts metadata from existing URI into a dict, so we can
- overwrite/delete from it to form the new set of metadata to apply to a
- key.
- """
- metadata = {}
- for underscore_name in self._underscore_base_user_settable_fields:
- if hasattr(self, underscore_name):
- value = getattr(self, underscore_name)
- if value:
- # Generate HTTP field name corresponding to "_" named field.
- field_name = underscore_name.replace('_', '-')
- metadata[field_name.lower()] = value
- # self.metadata contains custom metadata, which are all user-settable.
- prefix = self.provider.metadata_prefix
- for underscore_name in self.metadata:
- field_name = underscore_name.replace('_', '-')
- metadata['%s%s' % (prefix, field_name.lower())] = (
- self.metadata[underscore_name])
- return metadata
-
- def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl,
- headers=None):
- metadata_plus = self._normalize_metadata(metadata_plus)
- metadata_minus = self._normalize_metadata(metadata_minus)
- metadata = self._get_remote_metadata()
- metadata.update(metadata_plus)
- for h in metadata_minus:
- if h in metadata:
- del metadata[h]
- src_bucket = self.bucket
- # Boto prepends the meta prefix when adding headers, so strip prefix in
- # metadata before sending back in to copy_key() call.
- rewritten_metadata = {}
- for h in metadata:
- if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')):
- rewritten_h = (h.replace('x-goog-meta-', '')
- .replace('x-amz-meta-', ''))
- else:
- rewritten_h = h
- rewritten_metadata[rewritten_h] = metadata[h]
- metadata = rewritten_metadata
- src_bucket.copy_key(self.name, self.bucket.name, self.name,
- metadata=metadata, preserve_acl=preserve_acl,
- headers=headers)
-
- def restore(self, days, headers=None):
- """Restore an object from an archive.
-
- :type days: int
- :param days: The lifetime of the restored object (must
- be at least 1 day). If the object is already restored
- then this parameter can be used to readjust the lifetime
- of the restored object. In this case, the days
- param is with respect to the initial time of the request.
- If the object has not been restored, this param is with
- respect to the completion time of the request.
-
- """
- response = self.bucket.connection.make_request(
- 'POST', self.bucket.name, self.name,
- data=self.RestoreBody % days,
- headers=headers, query_args='restore')
- if response.status not in (200, 202):
- provider = self.bucket.connection.provider
- raise provider.storage_response_error(response.status,
- response.reason,
- response.read())

Powered by Google App Engine
This is Rietveld 408576698