| Index: gslib/commands/compose.py
|
| ===================================================================
|
| --- gslib/commands/compose.py (revision 33376)
|
| +++ gslib/commands/compose.py (working copy)
|
| @@ -1,3 +1,4 @@
|
| +# -*- coding: utf-8 -*-
|
| # Copyright 2013 Google Inc. All Rights Reserved.
|
| #
|
| # Licensed under the Apache License, Version 2.0 (the "License");
|
| @@ -11,59 +12,41 @@
|
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| # See the License for the specific language governing permissions and
|
| # limitations under the License.
|
| +"""Implementation of compose command for Google Cloud Storage."""
|
|
|
| +from __future__ import absolute_import
|
| +
|
| +from gslib.bucket_listing_ref import BucketListingObject
|
| from gslib.command import Command
|
| -from gslib.command import COMMAND_NAME
|
| -from gslib.command import COMMAND_NAME_ALIASES
|
| -from gslib.command import FILE_URIS_OK
|
| -from gslib.command import MAX_ARGS
|
| -from gslib.command import MIN_ARGS
|
| -from gslib.command import PROVIDER_URIS_OK
|
| -from gslib.command import SUPPORTED_SUB_ARGS
|
| -from gslib.command import URIS_START_ARG
|
| +from gslib.cs_api_map import ApiSelector
|
| from gslib.exception import CommandException
|
| -from gslib.help_provider import HELP_NAME
|
| -from gslib.help_provider import HELP_NAME_ALIASES
|
| -from gslib.help_provider import HELP_ONE_LINE_SUMMARY
|
| -from gslib.help_provider import HELP_TEXT
|
| -from gslib.help_provider import HelpType
|
| -from gslib.help_provider import HELP_TYPE
|
| -from gslib.name_expansion import NameExpansionIterator
|
| -from boto import storage_uri_for_key
|
| +from gslib.storage_url import ContainsWildcard
|
| +from gslib.storage_url import StorageUrlFromString
|
| +from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
|
| +from gslib.translation_helper import PreconditionsFromHeaders
|
|
|
| MAX_COMPONENT_COUNT = 1024
|
| MAX_COMPOSE_ARITY = 32
|
|
|
| -_detailed_help_text = ("""
|
| +_DETAILED_HELP_TEXT = ("""
|
| <B>SYNOPSIS</B>
|
| gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite
|
|
|
|
|
| <B>DESCRIPTION</B>
|
| The compose command creates a new object whose content is the concatenation
|
| - of a given sequence of component objects under the same bucket. This is useful
|
| - for parallel uploading and limited append functionality. For more information,
|
| - please see: https://developers.google.com/storage/docs/composite-objects
|
| + of a given sequence of component objects under the same bucket. gsutil uses
|
| + the content type of the first source object to determine the destination
|
| + object's content type. For more information, please see:
|
| + https://developers.google.com/storage/docs/composite-objects
|
|
|
| - To upload in parallel, split your file into smaller pieces, upload them using
|
| - "gsutil -m cp", compose the results, and delete the pieces:
|
| -
|
| - $ split -n 10 big-file big-file-part-
|
| - $ gsutil -m cp big-file-part-* gs://bucket/dir/
|
| - $ rm big-file-part-*
|
| - $ gsutil compose gs://bucket/dir/big-file-part-* gs://bucket/dir/big-file
|
| - $ gsutil -m rm gs://bucket/dir/big-file-part-*
|
| -
|
| - Note: The above example causes all file parts to be uploaded from a single
|
| - disk on a single machine, which could result in disk or CPU bottlenecks.
|
| - Especially when working with very large files, you may be able to achieve
|
| - higher performance by spreading the files across multiple disks and/or
|
| - running the parallel upload from multiple machines.
|
| -
|
| Note also that the gsutil cp command will automatically split uploads for
|
| large files into multiple component objects, upload them in parallel, and
|
| - compose them into a final object (which will also be subject to the component
|
| - count limit). See the 'PARALLEL COMPOSITE UPLOADS'" section under
|
| + compose them into a final object (which will be subject to the component
|
| + count limit). This will still perform all uploads from a single machine. For
|
| + extremely large files and/or very low per-machine bandwidth, you may want to
|
| + split the file and upload it from multiple machines, and later compose these
|
| + parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under
|
| 'gsutil help cp' for details.
|
|
|
| Appending simply entails uploading your new data to a temporary object,
|
| @@ -80,76 +63,96 @@
|
| times.
|
| """ % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1))
|
|
|
| +
|
| class ComposeCommand(Command):
|
| """Implementation of gsutil compose command."""
|
|
|
| - # Command specification (processed by parent class).
|
| - command_spec = {
|
| - # Name of command.
|
| - COMMAND_NAME : 'compose',
|
| - # List of command name aliases.
|
| - COMMAND_NAME_ALIASES : ['concat'],
|
| - # Min number of args required by this command.
|
| - MIN_ARGS : 2,
|
| - # Max number of args required by this command, or NO_MAX.
|
| - MAX_ARGS : MAX_COMPOSE_ARITY + 1,
|
| - # Getopt-style string specifying acceptable sub args.
|
| - SUPPORTED_SUB_ARGS : '',
|
| - # True if file URIs acceptable for this command.
|
| - FILE_URIS_OK : False, # Not files, just object names without gs:// prefix.
|
| - # True if provider-only URIs acceptable for this command.
|
| - PROVIDER_URIS_OK : False,
|
| - # Index in args of first URI arg.
|
| - URIS_START_ARG : 1,
|
| - }
|
| - help_spec = {
|
| - # Name of command or auxiliary help info for which this help applies.
|
| - HELP_NAME : 'compose',
|
| - # List of help name aliases.
|
| - HELP_NAME_ALIASES : ['concat'],
|
| - # Type of help)
|
| - HELP_TYPE : HelpType.COMMAND_HELP,
|
| - # One line summary of this help.
|
| - HELP_ONE_LINE_SUMMARY : (
|
| - 'Concatenate a sequence of objects into a new composite object.'),
|
| - # The full help text.
|
| - HELP_TEXT : _detailed_help_text,
|
| - }
|
| + # Command specification. See base class for documentation.
|
| + command_spec = Command.CreateCommandSpec(
|
| + 'compose',
|
| + command_name_aliases=['concat'],
|
| + min_args=2,
|
| + max_args=MAX_COMPOSE_ARITY + 1,
|
| + supported_sub_args='',
|
| + # Not files, just object names without gs:// prefix.
|
| + file_url_ok=False,
|
| + provider_url_ok=False,
|
| + urls_start_arg=1,
|
| + gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
|
| + gs_default_api=ApiSelector.JSON,
|
| + )
|
| + # Help specification. See help_provider.py for documentation.
|
| + help_spec = Command.HelpSpec(
|
| + help_name='compose',
|
| + help_name_aliases=['concat'],
|
| + help_type='command_help',
|
| + help_one_line_summary=(
|
| + 'Concatenate a sequence of objects into a new composite object.'),
|
| + help_text=_DETAILED_HELP_TEXT,
|
| + subcommand_help_text={},
|
| + )
|
|
|
| - def CheckSUriProvider(self, suri):
|
| - if suri.get_provider().name != 'google':
|
| + def CheckProvider(self, url):
|
| + if url.scheme != 'gs':
|
| raise CommandException(
|
| - '"compose" called on URI with unsupported provider (%s).' % str(suri))
|
| + '"compose" called on URL with unsupported provider (%s).' % str(url))
|
|
|
| # Command entry point.
|
| def RunCommand(self):
|
| - target_uri = self.args[-1]
|
| + """Command entry point for the compose command."""
|
| + target_url_str = self.args[-1]
|
| self.args = self.args[:-1]
|
| - target_suri = self.suri_builder.StorageUri(target_uri)
|
| - self.CheckSUriProvider(target_suri)
|
| - if target_suri.is_version_specific:
|
| - raise CommandException('A version-specific URI\n(%s)\ncannot be '
|
| + target_url = StorageUrlFromString(target_url_str)
|
| + self.CheckProvider(target_url)
|
| + if target_url.HasGeneration():
|
| + raise CommandException('A version-specific URL (%s) cannot be '
|
| 'the destination for gsutil compose - abort.'
|
| - % target_suri)
|
| + % target_url)
|
|
|
| - name_expansion_iterator = NameExpansionIterator(
|
| - self.command_name, self.proj_id_handler, self.headers, self.debug,
|
| - self.logger, self.bucket_storage_uri_class, self.args, False,
|
| - cmd_supports_recursion=False)
|
| + dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
|
| + bucket=target_url.bucket_name)
|
| +
|
| components = []
|
| - for ne_result in name_expansion_iterator:
|
| - suri = self.suri_builder.StorageUri(ne_result.GetExpandedUriStr())
|
| - self.CheckSUriProvider(suri)
|
| - components.append(suri)
|
| - # Avoid expanding too many components, and sanity check each name
|
| - # expansion result.
|
| - if len(components) > MAX_COMPOSE_ARITY:
|
| - raise CommandException('"compose" called with too many component '
|
| - 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)
|
| + # Remember the first source object so we can get its content type.
|
| + first_src_url = None
|
| + for src_url_str in self.args:
|
| + if ContainsWildcard(src_url_str):
|
| + src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
|
| + else:
|
| + src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
|
| + for blr in src_url_iter:
|
| + src_url = blr.storage_url
|
| + self.CheckProvider(src_url)
|
| +
|
| + if src_url.bucket_name != target_url.bucket_name:
|
| + raise CommandException(
|
| + 'GCS does not support inter-bucket composing.')
|
| +
|
| + if not first_src_url:
|
| + first_src_url = src_url
|
| + src_obj_metadata = (
|
| + apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
|
| + name=src_url.object_name))
|
| + if src_url.HasGeneration():
|
| + src_obj_metadata.generation = src_url.generation
|
| + components.append(src_obj_metadata)
|
| + # Avoid expanding too many components, and sanity check each name
|
| + # expansion result.
|
| + if len(components) > MAX_COMPOSE_ARITY:
|
| + raise CommandException('"compose" called with too many component '
|
| + 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)
|
| +
|
| if len(components) < 2:
|
| raise CommandException('"compose" requires at least 2 component objects.')
|
|
|
| + dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
|
| + first_src_url.bucket_name, first_src_url.object_name,
|
| + provider=first_src_url.scheme, fields=['contentType']).contentType
|
| +
|
| + preconditions = PreconditionsFromHeaders(self.headers or {})
|
| +
|
| self.logger.info(
|
| - 'Composing %s from %d component objects.' %
|
| - (target_suri, len(components)))
|
| - target_suri.compose(components, headers=self.headers)
|
| + 'Composing %s from %d component objects.', target_url, len(components))
|
| + self.gsutil_api.ComposeObject(components, dst_obj_metadata,
|
| + preconditions=preconditions,
|
| + provider=target_url.scheme)
|
|
|