| Index: third_party/gsutil/gslib/commands/compose.py
|
| diff --git a/third_party/gsutil/gslib/commands/compose.py b/third_party/gsutil/gslib/commands/compose.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..e33261fe8ce168118ab8fdc50e070a3cc306c023
|
| --- /dev/null
|
| +++ b/third_party/gsutil/gslib/commands/compose.py
|
| @@ -0,0 +1,167 @@
|
| +# -*- coding: utf-8 -*-
|
| +# Copyright 2013 Google Inc. All Rights Reserved.
|
| +#
|
| +# Licensed under the Apache License, Version 2.0 (the "License");
|
| +# you may not use this file except in compliance with the License.
|
| +# You may obtain a copy of the License at
|
| +#
|
| +# http://www.apache.org/licenses/LICENSE-2.0
|
| +#
|
| +# Unless required by applicable law or agreed to in writing, software
|
| +# distributed under the License is distributed on an "AS IS" BASIS,
|
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| +# See the License for the specific language governing permissions and
|
| +# limitations under the License.
|
| +"""Implementation of compose command for Google Cloud Storage."""
|
| +
|
| +from __future__ import absolute_import
|
| +
|
| +from gslib.bucket_listing_ref import BucketListingObject
|
| +from gslib.command import Command
|
| +from gslib.command_argument import CommandArgument
|
| +from gslib.cs_api_map import ApiSelector
|
| +from gslib.exception import CommandException
|
| +from gslib.storage_url import ContainsWildcard
|
| +from gslib.storage_url import StorageUrlFromString
|
| +from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
|
| +from gslib.translation_helper import PreconditionsFromHeaders
|
| +
|
| +MAX_COMPONENT_COUNT = 1024
|
| +MAX_COMPOSE_ARITY = 32
|
| +
|
| +_SYNOPSIS = """
|
| + gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite
|
| +"""
|
| +
|
| +_DETAILED_HELP_TEXT = ("""
|
| +<B>SYNOPSIS</B>
|
| +""" + _SYNOPSIS + """
|
| +
|
| +
|
| +<B>DESCRIPTION</B>
|
| + The compose command creates a new object whose content is the concatenation
|
| + of a given sequence of component objects under the same bucket. gsutil uses
|
| + the content type of the first source object to determine the destination
|
| + object's content type. For more information, please see:
|
| + https://developers.google.com/storage/docs/composite-objects
|
| +
|
| + Note also that the gsutil cp command will automatically split uploads for
|
| + large files into multiple component objects, upload them in parallel, and
|
| + compose them into a final object (which will be subject to the component
|
| + count limit). This will still perform all uploads from a single machine. For
|
| + extremely large files and/or very low per-machine bandwidth, you may want to
|
| + split the file and upload it from multiple machines, and later compose these
|
| + parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under
|
| + 'gsutil help cp' for details.
|
| +
|
| + Appending simply entails uploading your new data to a temporary object,
|
| + composing it with the growing append-target, and deleting the temporary
|
| + object:
|
| +
|
| + $ echo 'new data' | gsutil cp - gs://bucket/data-to-append
|
| + $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\
|
| + gs://bucket/append-target
|
| + $ gsutil rm gs://bucket/data-to-append
|
| +
|
| + Note that there is a limit (currently %d) to the number of components for a
|
| + given composite object. This means you can append to each object at most %d
|
| + times.
|
| +""" % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1))
|
| +
|
| +
|
| +class ComposeCommand(Command):
|
| + """Implementation of gsutil compose command."""
|
| +
|
| + # Command specification. See base class for documentation.
|
| + command_spec = Command.CreateCommandSpec(
|
| + 'compose',
|
| + command_name_aliases=['concat'],
|
| + usage_synopsis=_SYNOPSIS,
|
| + min_args=2,
|
| + max_args=MAX_COMPOSE_ARITY + 1,
|
| + supported_sub_args='',
|
| + # Not files, just object names without gs:// prefix.
|
| + file_url_ok=False,
|
| + provider_url_ok=False,
|
| + urls_start_arg=1,
|
| + gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
|
| + gs_default_api=ApiSelector.JSON,
|
| + argparse_arguments=[
|
| + CommandArgument.MakeZeroOrMoreCloudURLsArgument()
|
| + ]
|
| + )
|
| + # Help specification. See help_provider.py for documentation.
|
| + help_spec = Command.HelpSpec(
|
| + help_name='compose',
|
| + help_name_aliases=['concat'],
|
| + help_type='command_help',
|
| + help_one_line_summary=(
|
| + 'Concatenate a sequence of objects into a new composite object.'),
|
| + help_text=_DETAILED_HELP_TEXT,
|
| + subcommand_help_text={},
|
| + )
|
| +
|
| + def CheckProvider(self, url):
|
| + if url.scheme != 'gs':
|
| + raise CommandException(
|
| + '"compose" called on URL with unsupported provider (%s).' % str(url))
|
| +
|
| + # Command entry point.
|
| + def RunCommand(self):
|
| + """Command entry point for the compose command."""
|
| + target_url_str = self.args[-1]
|
| + self.args = self.args[:-1]
|
| + target_url = StorageUrlFromString(target_url_str)
|
| + self.CheckProvider(target_url)
|
| + if target_url.HasGeneration():
|
| + raise CommandException('A version-specific URL (%s) cannot be '
|
| + 'the destination for gsutil compose - abort.'
|
| + % target_url)
|
| +
|
| + dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
|
| + bucket=target_url.bucket_name)
|
| +
|
| + components = []
|
| + # Remember the first source object so we can get its content type.
|
| + first_src_url = None
|
| + for src_url_str in self.args:
|
| + if ContainsWildcard(src_url_str):
|
| + src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
|
| + else:
|
| + src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
|
| + for blr in src_url_iter:
|
| + src_url = blr.storage_url
|
| + self.CheckProvider(src_url)
|
| +
|
| + if src_url.bucket_name != target_url.bucket_name:
|
| + raise CommandException(
|
| + 'GCS does not support inter-bucket composing.')
|
| +
|
| + if not first_src_url:
|
| + first_src_url = src_url
|
| + src_obj_metadata = (
|
| + apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
|
| + name=src_url.object_name))
|
| + if src_url.HasGeneration():
|
| + src_obj_metadata.generation = src_url.generation
|
| + components.append(src_obj_metadata)
|
| + # Avoid expanding too many components, and sanity check each name
|
| + # expansion result.
|
| + if len(components) > MAX_COMPOSE_ARITY:
|
| + raise CommandException('"compose" called with too many component '
|
| + 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)
|
| +
|
| + if len(components) < 2:
|
| + raise CommandException('"compose" requires at least 2 component objects.')
|
| +
|
| + dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
|
| + first_src_url.bucket_name, first_src_url.object_name,
|
| + provider=first_src_url.scheme, fields=['contentType']).contentType
|
| +
|
| + preconditions = PreconditionsFromHeaders(self.headers or {})
|
| +
|
| + self.logger.info(
|
| + 'Composing %s from %d component objects.', target_url, len(components))
|
| + self.gsutil_api.ComposeObject(components, dst_obj_metadata,
|
| + preconditions=preconditions,
|
| + provider=target_url.scheme)
|
|
|