Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(403)

Side by Side Diff: gslib/commands/compose.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gslib/commands/cat.py ('k') | gslib/commands/config.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # -*- coding: utf-8 -*-
1 # Copyright 2013 Google Inc. All Rights Reserved. 2 # Copyright 2013 Google Inc. All Rights Reserved.
2 # 3 #
3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License. 5 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at 6 # You may obtain a copy of the License at
6 # 7 #
7 # http://www.apache.org/licenses/LICENSE-2.0 8 # http://www.apache.org/licenses/LICENSE-2.0
8 # 9 #
9 # Unless required by applicable law or agreed to in writing, software 10 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and 13 # See the License for the specific language governing permissions and
13 # limitations under the License. 14 # limitations under the License.
15 """Implementation of compose command for Google Cloud Storage."""
14 16
17 from __future__ import absolute_import
18
19 from gslib.bucket_listing_ref import BucketListingObject
15 from gslib.command import Command 20 from gslib.command import Command
16 from gslib.command import COMMAND_NAME 21 from gslib.cs_api_map import ApiSelector
17 from gslib.command import COMMAND_NAME_ALIASES
18 from gslib.command import FILE_URIS_OK
19 from gslib.command import MAX_ARGS
20 from gslib.command import MIN_ARGS
21 from gslib.command import PROVIDER_URIS_OK
22 from gslib.command import SUPPORTED_SUB_ARGS
23 from gslib.command import URIS_START_ARG
24 from gslib.exception import CommandException 22 from gslib.exception import CommandException
25 from gslib.help_provider import HELP_NAME 23 from gslib.storage_url import ContainsWildcard
26 from gslib.help_provider import HELP_NAME_ALIASES 24 from gslib.storage_url import StorageUrlFromString
27 from gslib.help_provider import HELP_ONE_LINE_SUMMARY 25 from gslib.third_party.storage_apitools import storage_v1_messages as apitools_m essages
28 from gslib.help_provider import HELP_TEXT 26 from gslib.translation_helper import PreconditionsFromHeaders
29 from gslib.help_provider import HelpType
30 from gslib.help_provider import HELP_TYPE
31 from gslib.name_expansion import NameExpansionIterator
32 from boto import storage_uri_for_key
33 27
34 MAX_COMPONENT_COUNT = 1024 28 MAX_COMPONENT_COUNT = 1024
35 MAX_COMPOSE_ARITY = 32 29 MAX_COMPOSE_ARITY = 32
36 30
37 _detailed_help_text = (""" 31 _DETAILED_HELP_TEXT = ("""
38 <B>SYNOPSIS</B> 32 <B>SYNOPSIS</B>
39 gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite 33 gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite
40 34
41 35
42 <B>DESCRIPTION</B> 36 <B>DESCRIPTION</B>
43 The compose command creates a new object whose content is the concatenation 37 The compose command creates a new object whose content is the concatenation
44 of a given sequence of component objects under the same bucket. This is useful 38 of a given sequence of component objects under the same bucket. gsutil uses
45 for parallel uploading and limited append functionality. For more information, 39 the content type of the first source object to determine the destination
46 please see: https://developers.google.com/storage/docs/composite-objects 40 object's content type. For more information, please see:
47 41 https://developers.google.com/storage/docs/composite-objects
48 To upload in parallel, split your file into smaller pieces, upload them using
49 "gsutil -m cp", compose the results, and delete the pieces:
50
51 $ split -n 10 big-file big-file-part-
52 $ gsutil -m cp big-file-part-* gs://bucket/dir/
53 $ rm big-file-part-*
54 $ gsutil compose gs://bucket/dir/big-file-part-* gs://bucket/dir/big-file
55 $ gsutil -m rm gs://bucket/dir/big-file-part-*
56
57 Note: The above example causes all file parts to be uploaded from a single
58 disk on a single machine, which could result in disk or CPU bottlenecks.
59 Especially when working with very large files, you may be able to achieve
60 higher performance by spreading the files across multiple disks and/or
61 running the parallel upload from multiple machines.
62 42
63 Note also that the gsutil cp command will automatically split uploads for 43 Note also that the gsutil cp command will automatically split uploads for
64 large files into multiple component objects, upload them in parallel, and 44 large files into multiple component objects, upload them in parallel, and
65 compose them into a final object (which will also be subject to the component 45 compose them into a final object (which will be subject to the component
66 count limit). See the 'PARALLEL COMPOSITE UPLOADS'" section under 46 count limit). This will still perform all uploads from a single machine. For
47 extremely large files and/or very low per-machine bandwidth, you may want to
48 split the file and upload it from multiple machines, and later compose these
49 parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under
67 'gsutil help cp' for details. 50 'gsutil help cp' for details.
68 51
69 Appending simply entails uploading your new data to a temporary object, 52 Appending simply entails uploading your new data to a temporary object,
70 composing it with the growing append-target, and deleting the temporary 53 composing it with the growing append-target, and deleting the temporary
71 object: 54 object:
72 55
73 $ echo 'new data' | gsutil cp - gs://bucket/data-to-append 56 $ echo 'new data' | gsutil cp - gs://bucket/data-to-append
74 $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\ 57 $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\
75 gs://bucket/append-target 58 gs://bucket/append-target
76 $ gsutil rm gs://bucket/data-to-append 59 $ gsutil rm gs://bucket/data-to-append
77 60
78 Note that there is a limit (currently %d) to the number of components for a 61 Note that there is a limit (currently %d) to the number of components for a
79 given composite object. This means you can append to each object at most %d 62 given composite object. This means you can append to each object at most %d
80 times. 63 times.
81 """ % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1)) 64 """ % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1))
82 65
66
83 class ComposeCommand(Command): 67 class ComposeCommand(Command):
84 """Implementation of gsutil compose command.""" 68 """Implementation of gsutil compose command."""
85 69
86 # Command specification (processed by parent class). 70 # Command specification. See base class for documentation.
87 command_spec = { 71 command_spec = Command.CreateCommandSpec(
88 # Name of command. 72 'compose',
89 COMMAND_NAME : 'compose', 73 command_name_aliases=['concat'],
90 # List of command name aliases. 74 min_args=2,
91 COMMAND_NAME_ALIASES : ['concat'], 75 max_args=MAX_COMPOSE_ARITY + 1,
92 # Min number of args required by this command. 76 supported_sub_args='',
93 MIN_ARGS : 2, 77 # Not files, just object names without gs:// prefix.
94 # Max number of args required by this command, or NO_MAX. 78 file_url_ok=False,
95 MAX_ARGS : MAX_COMPOSE_ARITY + 1, 79 provider_url_ok=False,
96 # Getopt-style string specifying acceptable sub args. 80 urls_start_arg=1,
97 SUPPORTED_SUB_ARGS : '', 81 gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
98 # True if file URIs acceptable for this command. 82 gs_default_api=ApiSelector.JSON,
99 FILE_URIS_OK : False, # Not files, just object names without gs:// prefix. 83 )
100 # True if provider-only URIs acceptable for this command. 84 # Help specification. See help_provider.py for documentation.
101 PROVIDER_URIS_OK : False, 85 help_spec = Command.HelpSpec(
102 # Index in args of first URI arg. 86 help_name='compose',
103 URIS_START_ARG : 1, 87 help_name_aliases=['concat'],
104 } 88 help_type='command_help',
105 help_spec = { 89 help_one_line_summary=(
106 # Name of command or auxiliary help info for which this help applies. 90 'Concatenate a sequence of objects into a new composite object.'),
107 HELP_NAME : 'compose', 91 help_text=_DETAILED_HELP_TEXT,
108 # List of help name aliases. 92 subcommand_help_text={},
109 HELP_NAME_ALIASES : ['concat'], 93 )
110 # Type of help)
111 HELP_TYPE : HelpType.COMMAND_HELP,
112 # One line summary of this help.
113 HELP_ONE_LINE_SUMMARY : (
114 'Concatenate a sequence of objects into a new composite object.'),
115 # The full help text.
116 HELP_TEXT : _detailed_help_text,
117 }
118 94
119 def CheckSUriProvider(self, suri): 95 def CheckProvider(self, url):
120 if suri.get_provider().name != 'google': 96 if url.scheme != 'gs':
121 raise CommandException( 97 raise CommandException(
122 '"compose" called on URI with unsupported provider (%s).' % str(suri)) 98 '"compose" called on URL with unsupported provider (%s).' % str(url))
123 99
124 # Command entry point. 100 # Command entry point.
125 def RunCommand(self): 101 def RunCommand(self):
126 target_uri = self.args[-1] 102 """Command entry point for the compose command."""
103 target_url_str = self.args[-1]
127 self.args = self.args[:-1] 104 self.args = self.args[:-1]
128 target_suri = self.suri_builder.StorageUri(target_uri) 105 target_url = StorageUrlFromString(target_url_str)
129 self.CheckSUriProvider(target_suri) 106 self.CheckProvider(target_url)
130 if target_suri.is_version_specific: 107 if target_url.HasGeneration():
131 raise CommandException('A version-specific URI\n(%s)\ncannot be ' 108 raise CommandException('A version-specific URL (%s) cannot be '
132 'the destination for gsutil compose - abort.' 109 'the destination for gsutil compose - abort.'
133 % target_suri) 110 % target_url)
134 111
135 name_expansion_iterator = NameExpansionIterator( 112 dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
136 self.command_name, self.proj_id_handler, self.headers, self.debug, 113 bucket=target_url.bucket_name)
137 self.logger, self.bucket_storage_uri_class, self.args, False, 114
138 cmd_supports_recursion=False)
139 components = [] 115 components = []
140 for ne_result in name_expansion_iterator: 116 # Remember the first source object so we can get its content type.
141 suri = self.suri_builder.StorageUri(ne_result.GetExpandedUriStr()) 117 first_src_url = None
142 self.CheckSUriProvider(suri) 118 for src_url_str in self.args:
143 components.append(suri) 119 if ContainsWildcard(src_url_str):
144 # Avoid expanding too many components, and sanity check each name 120 src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
145 # expansion result. 121 else:
146 if len(components) > MAX_COMPOSE_ARITY: 122 src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
147 raise CommandException('"compose" called with too many component ' 123 for blr in src_url_iter:
148 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) 124 src_url = blr.storage_url
125 self.CheckProvider(src_url)
126
127 if src_url.bucket_name != target_url.bucket_name:
128 raise CommandException(
129 'GCS does not support inter-bucket composing.')
130
131 if not first_src_url:
132 first_src_url = src_url
133 src_obj_metadata = (
134 apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
135 name=src_url.object_name))
136 if src_url.HasGeneration():
137 src_obj_metadata.generation = src_url.generation
138 components.append(src_obj_metadata)
139 # Avoid expanding too many components, and sanity check each name
140 # expansion result.
141 if len(components) > MAX_COMPOSE_ARITY:
142 raise CommandException('"compose" called with too many component '
143 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)
144
149 if len(components) < 2: 145 if len(components) < 2:
150 raise CommandException('"compose" requires at least 2 component objects.') 146 raise CommandException('"compose" requires at least 2 component objects.')
151 147
148 dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
149 first_src_url.bucket_name, first_src_url.object_name,
150 provider=first_src_url.scheme, fields=['contentType']).contentType
151
152 preconditions = PreconditionsFromHeaders(self.headers or {})
153
152 self.logger.info( 154 self.logger.info(
153 'Composing %s from %d component objects.' % 155 'Composing %s from %d component objects.', target_url, len(components))
154 (target_suri, len(components))) 156 self.gsutil_api.ComposeObject(components, dst_obj_metadata,
155 target_suri.compose(components, headers=self.headers) 157 preconditions=preconditions,
158 provider=target_url.scheme)
OLDNEW
« no previous file with comments | « gslib/commands/cat.py ('k') | gslib/commands/config.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698