Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(173)

Unified Diff: third_party/gsutil/gslib/commands/compose.py

Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master
Patch Set: Rename to gsutil. Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/gsutil/gslib/commands/cat.py ('k') | third_party/gsutil/gslib/commands/config.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/gsutil/gslib/commands/compose.py
diff --git a/third_party/gsutil/gslib/commands/compose.py b/third_party/gsutil/gslib/commands/compose.py
new file mode 100644
index 0000000000000000000000000000000000000000..e33261fe8ce168118ab8fdc50e070a3cc306c023
--- /dev/null
+++ b/third_party/gsutil/gslib/commands/compose.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of compose command for Google Cloud Storage."""
+
+from __future__ import absolute_import
+
+from gslib.bucket_listing_ref import BucketListingObject
+from gslib.command import Command
+from gslib.command_argument import CommandArgument
+from gslib.cs_api_map import ApiSelector
+from gslib.exception import CommandException
+from gslib.storage_url import ContainsWildcard
+from gslib.storage_url import StorageUrlFromString
+from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
+from gslib.translation_helper import PreconditionsFromHeaders
+
+MAX_COMPONENT_COUNT = 1024
+MAX_COMPOSE_ARITY = 32
+
+_SYNOPSIS = """
+ gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite
+"""
+
+_DETAILED_HELP_TEXT = ("""
+<B>SYNOPSIS</B>
+""" + _SYNOPSIS + """
+
+
+<B>DESCRIPTION</B>
+ The compose command creates a new object whose content is the concatenation
+ of a given sequence of component objects under the same bucket. gsutil uses
+ the content type of the first source object to determine the destination
+ object's content type. For more information, please see:
+ https://developers.google.com/storage/docs/composite-objects
+
+ Note also that the gsutil cp command will automatically split uploads for
+ large files into multiple component objects, upload them in parallel, and
+ compose them into a final object (which will be subject to the component
+ count limit). This will still perform all uploads from a single machine. For
+ extremely large files and/or very low per-machine bandwidth, you may want to
+ split the file and upload it from multiple machines, and later compose these
+ parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under
+ 'gsutil help cp' for details.
+
+ Appending simply entails uploading your new data to a temporary object,
+ composing it with the growing append-target, and deleting the temporary
+ object:
+
+ $ echo 'new data' | gsutil cp - gs://bucket/data-to-append
+ $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\
+ gs://bucket/append-target
+ $ gsutil rm gs://bucket/data-to-append
+
+ Note that there is a limit (currently %d) to the number of components for a
+ given composite object. This means you can append to each object at most %d
+ times.
+""" % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1))
+
+
+class ComposeCommand(Command):
+ """Implementation of gsutil compose command."""
+
+ # Command specification. See base class for documentation.
+ command_spec = Command.CreateCommandSpec(
+ 'compose',
+ command_name_aliases=['concat'],
+ usage_synopsis=_SYNOPSIS,
+ min_args=2,
+ max_args=MAX_COMPOSE_ARITY + 1,
+ supported_sub_args='',
+ # Not files, just object names without gs:// prefix.
+ file_url_ok=False,
+ provider_url_ok=False,
+ urls_start_arg=1,
+ gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
+ gs_default_api=ApiSelector.JSON,
+ argparse_arguments=[
+ CommandArgument.MakeZeroOrMoreCloudURLsArgument()
+ ]
+ )
+ # Help specification. See help_provider.py for documentation.
+ help_spec = Command.HelpSpec(
+ help_name='compose',
+ help_name_aliases=['concat'],
+ help_type='command_help',
+ help_one_line_summary=(
+ 'Concatenate a sequence of objects into a new composite object.'),
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
+
+ def CheckProvider(self, url):
+ if url.scheme != 'gs':
+ raise CommandException(
+ '"compose" called on URL with unsupported provider (%s).' % str(url))
+
+ # Command entry point.
+ def RunCommand(self):
+ """Command entry point for the compose command."""
+ target_url_str = self.args[-1]
+ self.args = self.args[:-1]
+ target_url = StorageUrlFromString(target_url_str)
+ self.CheckProvider(target_url)
+ if target_url.HasGeneration():
+ raise CommandException('A version-specific URL (%s) cannot be '
+ 'the destination for gsutil compose - abort.'
+ % target_url)
+
+ dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
+ bucket=target_url.bucket_name)
+
+ components = []
+ # Remember the first source object so we can get its content type.
+ first_src_url = None
+ for src_url_str in self.args:
+ if ContainsWildcard(src_url_str):
+ src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
+ else:
+ src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
+ for blr in src_url_iter:
+ src_url = blr.storage_url
+ self.CheckProvider(src_url)
+
+ if src_url.bucket_name != target_url.bucket_name:
+ raise CommandException(
+ 'GCS does not support inter-bucket composing.')
+
+ if not first_src_url:
+ first_src_url = src_url
+ src_obj_metadata = (
+ apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
+ name=src_url.object_name))
+ if src_url.HasGeneration():
+ src_obj_metadata.generation = src_url.generation
+ components.append(src_obj_metadata)
+ # Avoid expanding too many components, and sanity check each name
+ # expansion result.
+ if len(components) > MAX_COMPOSE_ARITY:
+ raise CommandException('"compose" called with too many component '
+ 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)
+
+ if len(components) < 2:
+ raise CommandException('"compose" requires at least 2 component objects.')
+
+ dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
+ first_src_url.bucket_name, first_src_url.object_name,
+ provider=first_src_url.scheme, fields=['contentType']).contentType
+
+ preconditions = PreconditionsFromHeaders(self.headers or {})
+
+ self.logger.info(
+ 'Composing %s from %d component objects.', target_url, len(components))
+ self.gsutil_api.ComposeObject(components, dst_obj_metadata,
+ preconditions=preconditions,
+ provider=target_url.scheme)
« no previous file with comments | « third_party/gsutil/gslib/commands/cat.py ('k') | third_party/gsutil/gslib/commands/config.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698