OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2013 Google Inc. All Rights Reserved. |
| 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at |
| 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. |
| 15 """Implementation of compose command for Google Cloud Storage.""" |
| 16 |
| 17 from __future__ import absolute_import |
| 18 |
| 19 from gslib.bucket_listing_ref import BucketListingObject |
| 20 from gslib.command import Command |
| 21 from gslib.command_argument import CommandArgument |
| 22 from gslib.cs_api_map import ApiSelector |
| 23 from gslib.exception import CommandException |
| 24 from gslib.storage_url import ContainsWildcard |
| 25 from gslib.storage_url import StorageUrlFromString |
| 26 from gslib.third_party.storage_apitools import storage_v1_messages as apitools_m
essages |
| 27 from gslib.translation_helper import PreconditionsFromHeaders |
| 28 |
| 29 MAX_COMPONENT_COUNT = 1024 |
| 30 MAX_COMPOSE_ARITY = 32 |
| 31 |
| 32 _SYNOPSIS = """ |
| 33 gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite |
| 34 """ |
| 35 |
| 36 _DETAILED_HELP_TEXT = (""" |
| 37 <B>SYNOPSIS</B> |
| 38 """ + _SYNOPSIS + """ |
| 39 |
| 40 |
| 41 <B>DESCRIPTION</B> |
| 42 The compose command creates a new object whose content is the concatenation |
| 43 of a given sequence of component objects under the same bucket. gsutil uses |
| 44 the content type of the first source object to determine the destination |
| 45 object's content type. For more information, please see: |
| 46 https://developers.google.com/storage/docs/composite-objects |
| 47 |
| 48 Note also that the gsutil cp command will automatically split uploads for |
| 49 large files into multiple component objects, upload them in parallel, and |
| 50 compose them into a final object (which will be subject to the component |
| 51 count limit). This will still perform all uploads from a single machine. For |
| 52 extremely large files and/or very low per-machine bandwidth, you may want to |
| 53 split the file and upload it from multiple machines, and later compose these |
| 54 parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under |
| 55 'gsutil help cp' for details. |
| 56 |
| 57 Appending simply entails uploading your new data to a temporary object, |
| 58 composing it with the growing append-target, and deleting the temporary |
| 59 object: |
| 60 |
| 61 $ echo 'new data' | gsutil cp - gs://bucket/data-to-append |
| 62 $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\ |
| 63 gs://bucket/append-target |
| 64 $ gsutil rm gs://bucket/data-to-append |
| 65 |
| 66 Note that there is a limit (currently %d) to the number of components for a |
| 67 given composite object. This means you can append to each object at most %d |
| 68 times. |
| 69 """ % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1)) |
| 70 |
| 71 |
| 72 class ComposeCommand(Command): |
| 73 """Implementation of gsutil compose command.""" |
| 74 |
| 75 # Command specification. See base class for documentation. |
| 76 command_spec = Command.CreateCommandSpec( |
| 77 'compose', |
| 78 command_name_aliases=['concat'], |
| 79 usage_synopsis=_SYNOPSIS, |
| 80 min_args=2, |
| 81 max_args=MAX_COMPOSE_ARITY + 1, |
| 82 supported_sub_args='', |
| 83 # Not files, just object names without gs:// prefix. |
| 84 file_url_ok=False, |
| 85 provider_url_ok=False, |
| 86 urls_start_arg=1, |
| 87 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| 88 gs_default_api=ApiSelector.JSON, |
| 89 argparse_arguments=[ |
| 90 CommandArgument.MakeZeroOrMoreCloudURLsArgument() |
| 91 ] |
| 92 ) |
| 93 # Help specification. See help_provider.py for documentation. |
| 94 help_spec = Command.HelpSpec( |
| 95 help_name='compose', |
| 96 help_name_aliases=['concat'], |
| 97 help_type='command_help', |
| 98 help_one_line_summary=( |
| 99 'Concatenate a sequence of objects into a new composite object.'), |
| 100 help_text=_DETAILED_HELP_TEXT, |
| 101 subcommand_help_text={}, |
| 102 ) |
| 103 |
| 104 def CheckProvider(self, url): |
| 105 if url.scheme != 'gs': |
| 106 raise CommandException( |
| 107 '"compose" called on URL with unsupported provider (%s).' % str(url)) |
| 108 |
| 109 # Command entry point. |
| 110 def RunCommand(self): |
| 111 """Command entry point for the compose command.""" |
| 112 target_url_str = self.args[-1] |
| 113 self.args = self.args[:-1] |
| 114 target_url = StorageUrlFromString(target_url_str) |
| 115 self.CheckProvider(target_url) |
| 116 if target_url.HasGeneration(): |
| 117 raise CommandException('A version-specific URL (%s) cannot be ' |
| 118 'the destination for gsutil compose - abort.' |
| 119 % target_url) |
| 120 |
| 121 dst_obj_metadata = apitools_messages.Object(name=target_url.object_name, |
| 122 bucket=target_url.bucket_name) |
| 123 |
| 124 components = [] |
| 125 # Remember the first source object so we can get its content type. |
| 126 first_src_url = None |
| 127 for src_url_str in self.args: |
| 128 if ContainsWildcard(src_url_str): |
| 129 src_url_iter = self.WildcardIterator(src_url_str).IterObjects() |
| 130 else: |
| 131 src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))] |
| 132 for blr in src_url_iter: |
| 133 src_url = blr.storage_url |
| 134 self.CheckProvider(src_url) |
| 135 |
| 136 if src_url.bucket_name != target_url.bucket_name: |
| 137 raise CommandException( |
| 138 'GCS does not support inter-bucket composing.') |
| 139 |
| 140 if not first_src_url: |
| 141 first_src_url = src_url |
| 142 src_obj_metadata = ( |
| 143 apitools_messages.ComposeRequest.SourceObjectsValueListEntry( |
| 144 name=src_url.object_name)) |
| 145 if src_url.HasGeneration(): |
| 146 src_obj_metadata.generation = src_url.generation |
| 147 components.append(src_obj_metadata) |
| 148 # Avoid expanding too many components, and sanity check each name |
| 149 # expansion result. |
| 150 if len(components) > MAX_COMPOSE_ARITY: |
| 151 raise CommandException('"compose" called with too many component ' |
| 152 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) |
| 153 |
| 154 if len(components) < 2: |
| 155 raise CommandException('"compose" requires at least 2 component objects.') |
| 156 |
| 157 dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata( |
| 158 first_src_url.bucket_name, first_src_url.object_name, |
| 159 provider=first_src_url.scheme, fields=['contentType']).contentType |
| 160 |
| 161 preconditions = PreconditionsFromHeaders(self.headers or {}) |
| 162 |
| 163 self.logger.info( |
| 164 'Composing %s from %d component objects.', target_url, len(components)) |
| 165 self.gsutil_api.ComposeObject(components, dst_obj_metadata, |
| 166 preconditions=preconditions, |
| 167 provider=target_url.scheme) |
OLD | NEW |