| OLD | NEW |
| 1 # -*- coding: utf-8 -*- |
| 1 # Copyright 2013 Google Inc. All Rights Reserved. | 2 # Copyright 2013 Google Inc. All Rights Reserved. |
| 2 # | 3 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
| 6 # | 7 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # | 9 # |
| 9 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, | 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and | 13 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. | 14 # limitations under the License. |
| 15 """Implementation of compose command for Google Cloud Storage.""" |
| 14 | 16 |
| 17 from __future__ import absolute_import |
| 18 |
| 19 from gslib.bucket_listing_ref import BucketListingObject |
| 15 from gslib.command import Command | 20 from gslib.command import Command |
| 16 from gslib.command import COMMAND_NAME | 21 from gslib.cs_api_map import ApiSelector |
| 17 from gslib.command import COMMAND_NAME_ALIASES | |
| 18 from gslib.command import FILE_URIS_OK | |
| 19 from gslib.command import MAX_ARGS | |
| 20 from gslib.command import MIN_ARGS | |
| 21 from gslib.command import PROVIDER_URIS_OK | |
| 22 from gslib.command import SUPPORTED_SUB_ARGS | |
| 23 from gslib.command import URIS_START_ARG | |
| 24 from gslib.exception import CommandException | 22 from gslib.exception import CommandException |
| 25 from gslib.help_provider import HELP_NAME | 23 from gslib.storage_url import ContainsWildcard |
| 26 from gslib.help_provider import HELP_NAME_ALIASES | 24 from gslib.storage_url import StorageUrlFromString |
| 27 from gslib.help_provider import HELP_ONE_LINE_SUMMARY | 25 from gslib.third_party.storage_apitools import storage_v1_messages as apitools_m
essages |
| 28 from gslib.help_provider import HELP_TEXT | 26 from gslib.translation_helper import PreconditionsFromHeaders |
| 29 from gslib.help_provider import HelpType | |
| 30 from gslib.help_provider import HELP_TYPE | |
| 31 from gslib.name_expansion import NameExpansionIterator | |
| 32 from boto import storage_uri_for_key | |
| 33 | 27 |
| 34 MAX_COMPONENT_COUNT = 1024 | 28 MAX_COMPONENT_COUNT = 1024 |
| 35 MAX_COMPOSE_ARITY = 32 | 29 MAX_COMPOSE_ARITY = 32 |
| 36 | 30 |
| 37 _detailed_help_text = (""" | 31 _DETAILED_HELP_TEXT = (""" |
| 38 <B>SYNOPSIS</B> | 32 <B>SYNOPSIS</B> |
| 39 gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite | 33 gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite |
| 40 | 34 |
| 41 | 35 |
| 42 <B>DESCRIPTION</B> | 36 <B>DESCRIPTION</B> |
| 43 The compose command creates a new object whose content is the concatenation | 37 The compose command creates a new object whose content is the concatenation |
| 44 of a given sequence of component objects under the same bucket. This is useful | 38 of a given sequence of component objects under the same bucket. gsutil uses |
| 45 for parallel uploading and limited append functionality. For more information, | 39 the content type of the first source object to determine the destination |
| 46 please see: https://developers.google.com/storage/docs/composite-objects | 40 object's content type. For more information, please see: |
| 47 | 41 https://developers.google.com/storage/docs/composite-objects |
| 48 To upload in parallel, split your file into smaller pieces, upload them using | |
| 49 "gsutil -m cp", compose the results, and delete the pieces: | |
| 50 | |
| 51 $ split -n 10 big-file big-file-part- | |
| 52 $ gsutil -m cp big-file-part-* gs://bucket/dir/ | |
| 53 $ rm big-file-part-* | |
| 54 $ gsutil compose gs://bucket/dir/big-file-part-* gs://bucket/dir/big-file | |
| 55 $ gsutil -m rm gs://bucket/dir/big-file-part-* | |
| 56 | |
| 57 Note: The above example causes all file parts to be uploaded from a single | |
| 58 disk on a single machine, which could result in disk or CPU bottlenecks. | |
| 59 Especially when working with very large files, you may be able to achieve | |
| 60 higher performance by spreading the files across multiple disks and/or | |
| 61 running the parallel upload from multiple machines. | |
| 62 | 42 |
| 63 Note also that the gsutil cp command will automatically split uploads for | 43 Note also that the gsutil cp command will automatically split uploads for |
| 64 large files into multiple component objects, upload them in parallel, and | 44 large files into multiple component objects, upload them in parallel, and |
| 65 compose them into a final object (which will also be subject to the component | 45 compose them into a final object (which will be subject to the component |
| 66 count limit). See the 'PARALLEL COMPOSITE UPLOADS'" section under | 46 count limit). This will still perform all uploads from a single machine. For |
| 47 extremely large files and/or very low per-machine bandwidth, you may want to |
| 48 split the file and upload it from multiple machines, and later compose these |
| 49 parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under |
| 67 'gsutil help cp' for details. | 50 'gsutil help cp' for details. |
| 68 | 51 |
| 69 Appending simply entails uploading your new data to a temporary object, | 52 Appending simply entails uploading your new data to a temporary object, |
| 70 composing it with the growing append-target, and deleting the temporary | 53 composing it with the growing append-target, and deleting the temporary |
| 71 object: | 54 object: |
| 72 | 55 |
| 73 $ echo 'new data' | gsutil cp - gs://bucket/data-to-append | 56 $ echo 'new data' | gsutil cp - gs://bucket/data-to-append |
| 74 $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\ | 57 $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\ |
| 75 gs://bucket/append-target | 58 gs://bucket/append-target |
| 76 $ gsutil rm gs://bucket/data-to-append | 59 $ gsutil rm gs://bucket/data-to-append |
| 77 | 60 |
| 78 Note that there is a limit (currently %d) to the number of components for a | 61 Note that there is a limit (currently %d) to the number of components for a |
| 79 given composite object. This means you can append to each object at most %d | 62 given composite object. This means you can append to each object at most %d |
| 80 times. | 63 times. |
| 81 """ % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1)) | 64 """ % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1)) |
| 82 | 65 |
| 66 |
| 83 class ComposeCommand(Command): | 67 class ComposeCommand(Command): |
| 84 """Implementation of gsutil compose command.""" | 68 """Implementation of gsutil compose command.""" |
| 85 | 69 |
| 86 # Command specification (processed by parent class). | 70 # Command specification. See base class for documentation. |
| 87 command_spec = { | 71 command_spec = Command.CreateCommandSpec( |
| 88 # Name of command. | 72 'compose', |
| 89 COMMAND_NAME : 'compose', | 73 command_name_aliases=['concat'], |
| 90 # List of command name aliases. | 74 min_args=2, |
| 91 COMMAND_NAME_ALIASES : ['concat'], | 75 max_args=MAX_COMPOSE_ARITY + 1, |
| 92 # Min number of args required by this command. | 76 supported_sub_args='', |
| 93 MIN_ARGS : 2, | 77 # Not files, just object names without gs:// prefix. |
| 94 # Max number of args required by this command, or NO_MAX. | 78 file_url_ok=False, |
| 95 MAX_ARGS : MAX_COMPOSE_ARITY + 1, | 79 provider_url_ok=False, |
| 96 # Getopt-style string specifying acceptable sub args. | 80 urls_start_arg=1, |
| 97 SUPPORTED_SUB_ARGS : '', | 81 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| 98 # True if file URIs acceptable for this command. | 82 gs_default_api=ApiSelector.JSON, |
| 99 FILE_URIS_OK : False, # Not files, just object names without gs:// prefix. | 83 ) |
| 100 # True if provider-only URIs acceptable for this command. | 84 # Help specification. See help_provider.py for documentation. |
| 101 PROVIDER_URIS_OK : False, | 85 help_spec = Command.HelpSpec( |
| 102 # Index in args of first URI arg. | 86 help_name='compose', |
| 103 URIS_START_ARG : 1, | 87 help_name_aliases=['concat'], |
| 104 } | 88 help_type='command_help', |
| 105 help_spec = { | 89 help_one_line_summary=( |
| 106 # Name of command or auxiliary help info for which this help applies. | 90 'Concatenate a sequence of objects into a new composite object.'), |
| 107 HELP_NAME : 'compose', | 91 help_text=_DETAILED_HELP_TEXT, |
| 108 # List of help name aliases. | 92 subcommand_help_text={}, |
| 109 HELP_NAME_ALIASES : ['concat'], | 93 ) |
| 110 # Type of help) | |
| 111 HELP_TYPE : HelpType.COMMAND_HELP, | |
| 112 # One line summary of this help. | |
| 113 HELP_ONE_LINE_SUMMARY : ( | |
| 114 'Concatenate a sequence of objects into a new composite object.'), | |
| 115 # The full help text. | |
| 116 HELP_TEXT : _detailed_help_text, | |
| 117 } | |
| 118 | 94 |
| 119 def CheckSUriProvider(self, suri): | 95 def CheckProvider(self, url): |
| 120 if suri.get_provider().name != 'google': | 96 if url.scheme != 'gs': |
| 121 raise CommandException( | 97 raise CommandException( |
| 122 '"compose" called on URI with unsupported provider (%s).' % str(suri)) | 98 '"compose" called on URL with unsupported provider (%s).' % str(url)) |
| 123 | 99 |
| 124 # Command entry point. | 100 # Command entry point. |
| 125 def RunCommand(self): | 101 def RunCommand(self): |
| 126 target_uri = self.args[-1] | 102 """Command entry point for the compose command.""" |
| 103 target_url_str = self.args[-1] |
| 127 self.args = self.args[:-1] | 104 self.args = self.args[:-1] |
| 128 target_suri = self.suri_builder.StorageUri(target_uri) | 105 target_url = StorageUrlFromString(target_url_str) |
| 129 self.CheckSUriProvider(target_suri) | 106 self.CheckProvider(target_url) |
| 130 if target_suri.is_version_specific: | 107 if target_url.HasGeneration(): |
| 131 raise CommandException('A version-specific URI\n(%s)\ncannot be ' | 108 raise CommandException('A version-specific URL (%s) cannot be ' |
| 132 'the destination for gsutil compose - abort.' | 109 'the destination for gsutil compose - abort.' |
| 133 % target_suri) | 110 % target_url) |
| 134 | 111 |
| 135 name_expansion_iterator = NameExpansionIterator( | 112 dst_obj_metadata = apitools_messages.Object(name=target_url.object_name, |
| 136 self.command_name, self.proj_id_handler, self.headers, self.debug, | 113 bucket=target_url.bucket_name) |
| 137 self.logger, self.bucket_storage_uri_class, self.args, False, | 114 |
| 138 cmd_supports_recursion=False) | |
| 139 components = [] | 115 components = [] |
| 140 for ne_result in name_expansion_iterator: | 116 # Remember the first source object so we can get its content type. |
| 141 suri = self.suri_builder.StorageUri(ne_result.GetExpandedUriStr()) | 117 first_src_url = None |
| 142 self.CheckSUriProvider(suri) | 118 for src_url_str in self.args: |
| 143 components.append(suri) | 119 if ContainsWildcard(src_url_str): |
| 144 # Avoid expanding too many components, and sanity check each name | 120 src_url_iter = self.WildcardIterator(src_url_str).IterObjects() |
| 145 # expansion result. | 121 else: |
| 146 if len(components) > MAX_COMPOSE_ARITY: | 122 src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))] |
| 147 raise CommandException('"compose" called with too many component ' | 123 for blr in src_url_iter: |
| 148 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) | 124 src_url = blr.storage_url |
| 125 self.CheckProvider(src_url) |
| 126 |
| 127 if src_url.bucket_name != target_url.bucket_name: |
| 128 raise CommandException( |
| 129 'GCS does not support inter-bucket composing.') |
| 130 |
| 131 if not first_src_url: |
| 132 first_src_url = src_url |
| 133 src_obj_metadata = ( |
| 134 apitools_messages.ComposeRequest.SourceObjectsValueListEntry( |
| 135 name=src_url.object_name)) |
| 136 if src_url.HasGeneration(): |
| 137 src_obj_metadata.generation = src_url.generation |
| 138 components.append(src_obj_metadata) |
| 139 # Avoid expanding too many components, and sanity check each name |
| 140 # expansion result. |
| 141 if len(components) > MAX_COMPOSE_ARITY: |
| 142 raise CommandException('"compose" called with too many component ' |
| 143 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) |
| 144 |
| 149 if len(components) < 2: | 145 if len(components) < 2: |
| 150 raise CommandException('"compose" requires at least 2 component objects.') | 146 raise CommandException('"compose" requires at least 2 component objects.') |
| 151 | 147 |
| 148 dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata( |
| 149 first_src_url.bucket_name, first_src_url.object_name, |
| 150 provider=first_src_url.scheme, fields=['contentType']).contentType |
| 151 |
| 152 preconditions = PreconditionsFromHeaders(self.headers or {}) |
| 153 |
| 152 self.logger.info( | 154 self.logger.info( |
| 153 'Composing %s from %d component objects.' % | 155 'Composing %s from %d component objects.', target_url, len(components)) |
| 154 (target_suri, len(components))) | 156 self.gsutil_api.ComposeObject(components, dst_obj_metadata, |
| 155 target_suri.compose(components, headers=self.headers) | 157 preconditions=preconditions, |
| 158 provider=target_url.scheme) |
| OLD | NEW |