| Index: third_party/gsutil/boto/boto/glacier/vault.py
|
| diff --git a/third_party/gsutil/boto/boto/glacier/vault.py b/third_party/gsutil/boto/boto/glacier/vault.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..e037adc700ab7d72594d060f436ec891253daf12
|
| --- /dev/null
|
| +++ b/third_party/gsutil/boto/boto/glacier/vault.py
|
| @@ -0,0 +1,387 @@
|
| +# -*- coding: utf-8 -*-
|
| +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
|
| +# Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk>
|
| +#
|
| +# Permission is hereby granted, free of charge, to any person obtaining a
|
| +# copy of this software and associated documentation files (the
|
| +# "Software"), to deal in the Software without restriction, including
|
| +# without limitation the rights to use, copy, modify, merge, publish, dis-
|
| +# tribute, sublicense, and/or sell copies of the Software, and to permit
|
| +# persons to whom the Software is furnished to do so, subject to the fol-
|
| +# lowing conditions:
|
| +#
|
| +# The above copyright notice and this permission notice shall be included
|
| +# in all copies or substantial portions of the Software.
|
| +#
|
| +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
| +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
|
| +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
| +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
| +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
| +# IN THE SOFTWARE.
|
| +#
|
| +from __future__ import with_statement
|
| +from .exceptions import UploadArchiveError
|
| +from .job import Job
|
| +from .writer import compute_hashes_from_fileobj, resume_file_upload, Writer
|
| +from .concurrent import ConcurrentUploader
|
| +from .utils import minimum_part_size, DEFAULT_PART_SIZE
|
| +import os.path
|
| +
|
| +
|
| +_MEGABYTE = 1024 * 1024
|
| +_GIGABYTE = 1024 * _MEGABYTE
|
| +
|
| +MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE
|
| +MAXIMUM_NUMBER_OF_PARTS = 10000
|
| +
|
| +
|
| +class Vault(object):
|
| +
|
| + DefaultPartSize = DEFAULT_PART_SIZE
|
| + SingleOperationThreshold = 100 * _MEGABYTE
|
| +
|
| + ResponseDataElements = (('VaultName', 'name', None),
|
| + ('VaultARN', 'arn', None),
|
| + ('CreationDate', 'creation_date', None),
|
| + ('LastInventoryDate', 'last_inventory_date', None),
|
| + ('SizeInBytes', 'size', 0),
|
| + ('NumberOfArchives', 'number_of_archives', 0))
|
| +
|
| + def __init__(self, layer1, response_data=None):
|
| + self.layer1 = layer1
|
| + if response_data:
|
| + for response_name, attr_name, default in self.ResponseDataElements:
|
| + value = response_data[response_name]
|
| + if isinstance(value, unicode):
|
| + value = value.encode('utf8')
|
| + setattr(self, attr_name, value)
|
| + else:
|
| + for response_name, attr_name, default in self.ResponseDataElements:
|
| + setattr(self, attr_name, default)
|
| +
|
| + def __repr__(self):
|
| + return 'Vault("%s")' % self.arn
|
| +
|
| + def delete(self):
|
| + """
|
| + Delete's this vault. WARNING!
|
| + """
|
| + self.layer1.delete_vault(self.name)
|
| +
|
| + def upload_archive(self, filename, description=None):
|
| + """
|
| + Adds an archive to a vault. For archives greater than 100MB the
|
| + multipart upload will be used.
|
| +
|
| + :type file: str
|
| + :param file: A filename to upload
|
| +
|
| + :type description: str
|
| + :param description: An optional description for the archive.
|
| +
|
| + :rtype: str
|
| + :return: The archive id of the newly created archive
|
| + """
|
| + if os.path.getsize(filename) > self.SingleOperationThreshold:
|
| + return self.create_archive_from_file(filename, description=description)
|
| + return self._upload_archive_single_operation(filename, description)
|
| +
|
| + def _upload_archive_single_operation(self, filename, description):
|
| + """
|
| + Adds an archive to a vault in a single operation. It's recommended for
|
| + archives less than 100MB
|
| +
|
| + :type file: str
|
| + :param file: A filename to upload
|
| +
|
| + :type description: str
|
| + :param description: A description for the archive.
|
| +
|
| + :rtype: str
|
| + :return: The archive id of the newly created archive
|
| + """
|
| + with open(filename, 'rb') as fileobj:
|
| + linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj)
|
| + fileobj.seek(0)
|
| + response = self.layer1.upload_archive(self.name, fileobj,
|
| + linear_hash, tree_hash,
|
| + description)
|
| + return response['ArchiveId']
|
| +
|
| + def create_archive_writer(self, part_size=DefaultPartSize,
|
| + description=None):
|
| + """
|
| + Create a new archive and begin a multi-part upload to it.
|
| + Returns a file-like object to which the data for the archive
|
| + can be written. Once all the data is written the file-like
|
| + object should be closed, you can then call the get_archive_id
|
| + method on it to get the ID of the created archive.
|
| +
|
| + :type part_size: int
|
| + :param part_size: The part size for the multipart upload.
|
| +
|
| + :type description: str
|
| + :param description: An optional description for the archive.
|
| +
|
| + :rtype: :class:`boto.glacier.writer.Writer`
|
| + :return: A Writer object that to which the archive data
|
| + should be written.
|
| + """
|
| + response = self.layer1.initiate_multipart_upload(self.name,
|
| + part_size,
|
| + description)
|
| + return Writer(self, response['UploadId'], part_size=part_size)
|
| +
|
| + def create_archive_from_file(self, filename=None, file_obj=None,
|
| + description=None, upload_id_callback=None):
|
| + """
|
| + Create a new archive and upload the data from the given file
|
| + or file-like object.
|
| +
|
| + :type filename: str
|
| + :param filename: A filename to upload
|
| +
|
| + :type file_obj: file
|
| + :param file_obj: A file-like object to upload
|
| +
|
| + :type description: str
|
| + :param description: An optional description for the archive.
|
| +
|
| + :type upload_id_callback: function
|
| + :param upload_id_callback: if set, call with the upload_id as the
|
| + only parameter when it becomes known, to enable future calls
|
| + to resume_archive_from_file in case resume is needed.
|
| +
|
| + :rtype: str
|
| + :return: The archive id of the newly created archive
|
| + """
|
| + part_size = self.DefaultPartSize
|
| + if not file_obj:
|
| + file_size = os.path.getsize(filename)
|
| + try:
|
| + min_part_size = minimum_part_size(file_size,
|
| + self.DefaultPartSize)
|
| + except ValueError:
|
| + raise UploadArchiveError("File size of %s bytes exceeds "
|
| + "40,000 GB archive limit of Glacier.")
|
| + file_obj = open(filename, "rb")
|
| + writer = self.create_archive_writer(
|
| + description=description,
|
| + part_size=part_size)
|
| + if upload_id_callback:
|
| + upload_id_callback(writer.upload_id)
|
| + while True:
|
| + data = file_obj.read(part_size)
|
| + if not data:
|
| + break
|
| + writer.write(data)
|
| + writer.close()
|
| + return writer.get_archive_id()
|
| +
|
| + @staticmethod
|
| + def _range_string_to_part_index(range_string, part_size):
|
| + start, inside_end = [int(value) for value in range_string.split('-')]
|
| + end = inside_end + 1
|
| + length = end - start
|
| + if length == part_size + 1:
|
| + # Off-by-one bug in Amazon's Glacier implementation,
|
| + # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866
|
| + # Workaround: since part_size is too big by one byte, adjust it
|
| + end -= 1
|
| + inside_end -= 1
|
| + length -= 1
|
| + assert not (start % part_size), (
|
| + "upload part start byte is not on a part boundary")
|
| + assert (length <= part_size), "upload part is bigger than part size"
|
| + return start // part_size
|
| +
|
| + def resume_archive_from_file(self, upload_id, filename=None,
|
| + file_obj=None):
|
| + """Resume upload of a file already part-uploaded to Glacier.
|
| +
|
| + The resumption of an upload where the part-uploaded section is empty
|
| + is a valid degenerate case that this function can handle.
|
| +
|
| + One and only one of filename or file_obj must be specified.
|
| +
|
| + :type upload_id: str
|
| + :param upload_id: existing Glacier upload id of upload being resumed.
|
| +
|
| + :type filename: str
|
| + :param filename: file to open for resume
|
| +
|
| + :type fobj: file
|
| + :param fobj: file-like object containing local data to resume. This
|
| + must read from the start of the entire upload, not just from the
|
| + point being resumed. Use fobj.seek(0) to achieve this if necessary.
|
| +
|
| + :rtype: str
|
| + :return: The archive id of the newly created archive
|
| +
|
| + """
|
| + part_list_response = self.list_all_parts(upload_id)
|
| + part_size = part_list_response['PartSizeInBytes']
|
| +
|
| + part_hash_map = {}
|
| + for part_desc in part_list_response['Parts']:
|
| + part_index = self._range_string_to_part_index(
|
| + part_desc['RangeInBytes'], part_size)
|
| + part_tree_hash = part_desc['SHA256TreeHash'].decode('hex')
|
| + part_hash_map[part_index] = part_tree_hash
|
| +
|
| + if not file_obj:
|
| + file_obj = open(filename, "rb")
|
| +
|
| + return resume_file_upload(
|
| + self, upload_id, part_size, file_obj, part_hash_map)
|
| +
|
| + def concurrent_create_archive_from_file(self, filename, description):
|
| + """
|
| + Create a new archive from a file and upload the given
|
| + file.
|
| +
|
| + This is a convenience method around the
|
| + :class:`boto.glacier.concurrent.ConcurrentUploader`
|
| + class. This method will perform a multipart upload
|
| + and upload the parts of the file concurrently.
|
| +
|
| + :type filename: str
|
| + :param filename: A filename to upload
|
| +
|
| + :raises: `boto.glacier.exception.UploadArchiveError` is an error
|
| + occurs during the upload process.
|
| +
|
| + :rtype: str
|
| + :return: The archive id of the newly created archive
|
| +
|
| + """
|
| + uploader = ConcurrentUploader(self.layer1, self.name)
|
| + archive_id = uploader.upload(filename, description)
|
| + return archive_id
|
| +
|
| + def retrieve_archive(self, archive_id, sns_topic=None,
|
| + description=None):
|
| + """
|
| + Initiate a archive retrieval job to download the data from an
|
| + archive. You will need to wait for the notification from
|
| + Amazon (via SNS) before you can actually download the data,
|
| + this takes around 4 hours.
|
| +
|
| + :type archive_id: str
|
| + :param archive_id: The id of the archive
|
| +
|
| + :type description: str
|
| + :param description: An optional description for the job.
|
| +
|
| + :type sns_topic: str
|
| + :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
|
| + sends notification when the job is completed and the output
|
| + is ready for you to download.
|
| +
|
| + :rtype: :class:`boto.glacier.job.Job`
|
| + :return: A Job object representing the retrieval job.
|
| + """
|
| + job_data = {'Type': 'archive-retrieval',
|
| + 'ArchiveId': archive_id}
|
| + if sns_topic is not None:
|
| + job_data['SNSTopic'] = sns_topic
|
| + if description is not None:
|
| + job_data['Description'] = description
|
| +
|
| + response = self.layer1.initiate_job(self.name, job_data)
|
| + return self.get_job(response['JobId'])
|
| +
|
| + def retrieve_inventory(self, sns_topic=None,
|
| + description=None):
|
| + """
|
| + Initiate a inventory retrieval job to list the items in the
|
| + vault. You will need to wait for the notification from
|
| + Amazon (via SNS) before you can actually download the data,
|
| + this takes around 4 hours.
|
| +
|
| + :type description: str
|
| + :param description: An optional description for the job.
|
| +
|
| + :type sns_topic: str
|
| + :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
|
| + sends notification when the job is completed and the output
|
| + is ready for you to download.
|
| +
|
| + :rtype: :class:`boto.glacier.job.Job`
|
| + :return: A Job object representing the retrieval job.
|
| + """
|
| + job_data = {'Type': 'inventory-retrieval'}
|
| + if sns_topic is not None:
|
| + job_data['SNSTopic'] = sns_topic
|
| + if description is not None:
|
| + job_data['Description'] = description
|
| +
|
| + response = self.layer1.initiate_job(self.name, job_data)
|
| + return response['JobId']
|
| +
|
| + def delete_archive(self, archive_id):
|
| + """
|
| + This operation deletes an archive from the vault.
|
| +
|
| + :type archive_id: str
|
| + :param archive_id: The ID for the archive to be deleted.
|
| + """
|
| + return self.layer1.delete_archive(self.name, archive_id)
|
| +
|
| + def get_job(self, job_id):
|
| + """
|
| + Get an object representing a job in progress.
|
| +
|
| + :type job_id: str
|
| + :param job_id: The ID of the job
|
| +
|
| + :rtype: :class:`boto.glacier.job.Job`
|
| + :return: A Job object representing the job.
|
| + """
|
| + response_data = self.layer1.describe_job(self.name, job_id)
|
| + return Job(self, response_data)
|
| +
|
| + def list_jobs(self, completed=None, status_code=None):
|
| + """
|
| + Return a list of Job objects related to this vault.
|
| +
|
| + :type completed: boolean
|
| + :param completed: Specifies the state of the jobs to return.
|
| + If a value of True is passed, only completed jobs will
|
| + be returned. If a value of False is passed, only
|
| + uncompleted jobs will be returned. If no value is
|
| + passed, all jobs will be returned.
|
| +
|
| + :type status_code: string
|
| + :param status_code: Specifies the type of job status to return.
|
| + Valid values are: InProgress|Succeeded|Failed. If not
|
| + specified, jobs with all status codes are returned.
|
| +
|
| + :rtype: list of :class:`boto.glacier.job.Job`
|
| + :return: A list of Job objects related to this vault.
|
| + """
|
| + response_data = self.layer1.list_jobs(self.name, completed,
|
| + status_code)
|
| + return [Job(self, jd) for jd in response_data['JobList']]
|
| +
|
| + def list_all_parts(self, upload_id):
|
| + """Automatically make and combine multiple calls to list_parts.
|
| +
|
| + Call list_parts as necessary, combining the results in case multiple
|
| + calls were required to get data on all available parts.
|
| +
|
| + """
|
| + result = self.layer1.list_parts(self.name, upload_id)
|
| + marker = result['Marker']
|
| + while marker:
|
| + additional_result = self.layer1.list_parts(
|
| + self.name, upload_id, marker=marker)
|
| + result['Parts'].extend(additional_result['Parts'])
|
| + marker = additional_result['Marker']
|
| + # The marker makes no sense in an unpaginated result, and clearing it
|
| + # makes testing easier. This also has the nice property that the result
|
| + # is a normal (but expanded) response.
|
| + result['Marker'] = None
|
| + return result
|
|
|