Index: third_party/gsutil/gslib/commands/setmeta.py |
diff --git a/third_party/gsutil/gslib/commands/setmeta.py b/third_party/gsutil/gslib/commands/setmeta.py |
deleted file mode 100644 |
index 9c39d75befe8f519eb28a0abff26fe9d687523da..0000000000000000000000000000000000000000 |
--- a/third_party/gsutil/gslib/commands/setmeta.py |
+++ /dev/null |
@@ -1,420 +0,0 @@ |
-# Copyright 2012 Google Inc. All Rights Reserved. |
-#coding=utf8 |
-# |
-# Licensed under the Apache License, Version 2.0 (the "License"); |
-# you may not use this file except in compliance with the License. |
-# You may obtain a copy of the License at |
-# |
-# http://www.apache.org/licenses/LICENSE-2.0 |
-# |
-# Unless required by applicable law or agreed to in writing, software |
-# distributed under the License is distributed on an "AS IS" BASIS, |
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
-# See the License for the specific language governing permissions and |
-# limitations under the License. |
- |
-import boto |
-import csv |
-import random |
-import StringIO |
-import time |
- |
-from boto.exception import GSResponseError |
-from boto.s3.key import Key |
-from gslib.command import COMMAND_NAME |
-from gslib.command import COMMAND_NAME_ALIASES |
-from gslib.command import CONFIG_REQUIRED |
-from gslib.command import Command |
-from gslib.command import FILE_URIS_OK |
-from gslib.command import MAX_ARGS |
-from gslib.command import MIN_ARGS |
-from gslib.command import PROVIDER_URIS_OK |
-from gslib.command import SUPPORTED_SUB_ARGS |
-from gslib.command import URIS_START_ARG |
-from gslib.exception import CommandException |
-from gslib.help_provider import HELP_NAME |
-from gslib.help_provider import HELP_NAME_ALIASES |
-from gslib.help_provider import HELP_ONE_LINE_SUMMARY |
-from gslib.help_provider import HELP_TEXT |
-from gslib.help_provider import HELP_TYPE |
-from gslib.help_provider import HelpType |
-from gslib.name_expansion import NameExpansionIterator |
-from gslib.util import NO_MAX |
-from gslib.util import Retry |
- |
-_detailed_help_text = (""" |
-<B>SYNOPSIS</B> |
- gsutil setmeta [-n] -h [header:value|header] ... uri... |
- |
- |
-<B>DESCRIPTION</B> |
- The gsutil setmeta command allows you to set or remove the metadata on one |
- or more objects. It takes one or more header arguments followed by one or |
- more URIs, where each header argument is in one of two forms: |
- |
- - if you specify header:value, it will set the given header on all |
- named objects. |
- |
- - if you specify header (with no value), it will remove the given header |
- from all named objects. |
- |
- For example, the following command would set the Content-Type and |
- Cache-Control and remove the Content-Disposition on the specified objects: |
- |
- gsutil setmeta -h "Content-Type:text/html" \\ |
- -h "Cache-Control:public, max-age=3600" \\ |
- -h "Content-Disposition" gs://bucket/*.html |
- |
- If you have a large number of objects to update you might want to use the |
- gsutil -m option, to perform a parallel (multi-threaded/multi-processing) |
- update: |
- |
- gsutil -m setmeta -h "Content-Type:text/html" \\ |
- -h "Cache-Control:public, max-age=3600" \\ |
- -h "Content-Disposition" gs://bucket/*.html |
- |
- See "gsutil help metadata" for details about how you can set metadata |
- while uploading objects, what metadata fields can be set and the meaning of |
- these fields, use of custom metadata, and how to view currently set metadata. |
- |
- |
-<B>OPERATION COST</B> |
- This command uses four operations per URI (one to read the ACL, one to read |
- the current metadata, one to set the new metadata, and one to set the ACL). |
- |
- For cases where you want all objects to have the same ACL you can avoid half |
- these operations by setting a default ACL on the bucket(s) containing the |
- named objects, and using the setmeta -n option. See "help gsutil setdefacl". |
- |
- |
-<B>OPTIONS</B> |
- -h Specifies a header:value to be added, or header to be removed, |
- from each named object. |
- -n Causes the operations for reading and writing the ACL to be |
- skipped. This halves the number of operations performed per |
- request, improving the speed and reducing the cost of performing |
- the operations. This option makes sense for cases where you want |
- all objects to have the same ACL, for which you have set a default |
- ACL on the bucket(s) containing the objects. See "help gsutil |
- setdefacl". |
- |
- |
-<B>OLDER SYNTAX (DEPRECATED)</B> |
- The first version of the setmeta command used more complicated syntax |
- (described below). gsutil still supports this syntax, to avoid breaking |
- existing customer uses, but it is now deprecated and will eventually |
- be removed. |
- |
- With this older syntax, the setmeta command accepts a single metadata |
- argument in one of two forms: |
- |
- gsutil setmeta [-n] header:value uri... |
- |
- or |
- |
- gsutil setmeta [-n] '"header:value","-header",...' uri... |
- |
- The first form allows you to specify a single header name and value to |
- set. For example, the following command would set the Content-Type and |
- Cache-Control and remove the Content-Disposition on the specified objects: |
- |
- gsutil setmeta -h "Content-Type:text/html" \\ |
- -h "Cache-Control:public, max-age=3600" \\ |
- -h "Content-Disposition" gs://bucket/*.html |
- |
- This form only works if the header name and value don't contain double |
- quotes or commas, and only works for setting the header value (not for |
- removing it). |
- |
- The more general form of the first argument allows both setting and removing |
- multiple fields, without any of the content restrictions noted above. For |
- this variant the first argument is a CSV-formatted list of headers to add |
- or remove. Getting the CSV-formatted list to be passed correctly into gsutil |
- requires different syntax on Linux or MacOS than it does on Windows. |
- |
- On Linux or MacOS you need to surround the entire argument in single quotes |
- to avoid having the shell interpret/strip out the double-quotes in the CSV |
- data. For example, the following command would set the Content-Type and |
- Cache-Control and remove the Content-Disposition on the specified objects: |
- |
- gsutil setmeta '"Content-Type:text/html","Cache-Control:public, max-age=3600","-Content-Disposition"' gs://bucket/*.html |
- |
- To pass CSV data on Windows you need two sets of double quotes around |
- each header/value pair, and one set of double quotes around the entire |
- expression. For example, the following command would set the Content-Type |
- and Cache-Control and remove the Content-Disposition on the specified objects: |
- |
- gsutil setmeta "\""Content-Type:text/html"",""Cache-Control:public, max-age=3600"",""-Content-Disposition""\" gs://bucket/*.html |
- |
- |
-<B>WARNING ABOUT USING SETMETA WITH VERSIONING ENABLED</B> |
- |
-Note that if you use the gsutil setmeta command on an object in a bucket |
-with versioning enabled (see 'gsutil help versioning'), it will create |
-a new object version (and thus, you will get charged for the space required |
-for holding the additional version). |
-""") |
- |
- |
-class SetMetaCommand(Command): |
- """Implementation of gsutil setmeta command.""" |
- |
- # Command specification (processed by parent class). |
- command_spec = { |
- # Name of command. |
- COMMAND_NAME : 'setmeta', |
- # List of command name aliases. |
- COMMAND_NAME_ALIASES : ['setheader'], |
- # Min number of args required by this command. |
- MIN_ARGS : 1, |
- # Max number of args required by this command, or NO_MAX. |
- MAX_ARGS : NO_MAX, |
- # Getopt-style string specifying acceptable sub args. |
- SUPPORTED_SUB_ARGS : 'h:n', |
- # True if file URIs acceptable for this command. |
- FILE_URIS_OK : False, |
- # True if provider-only URIs acceptable for this command. |
- PROVIDER_URIS_OK : False, |
- # Index in args of first URI arg. |
- URIS_START_ARG : 1, |
- # True if must configure gsutil before running command. |
- CONFIG_REQUIRED : True, |
- } |
- help_spec = { |
- # Name of command or auxiliary help info for which this help applies. |
- HELP_NAME : 'setmeta', |
- # List of help name aliases. |
- HELP_NAME_ALIASES : ['setheader'], |
- # Type of help: |
- HELP_TYPE : HelpType.COMMAND_HELP, |
- # One line summary of this help. |
- HELP_ONE_LINE_SUMMARY : 'Set metadata on already uploaded objects', |
- # The full help text. |
- HELP_TEXT : _detailed_help_text, |
- } |
- |
- # Command entry point. |
- def RunCommand(self): |
- headers = [] |
- preserve_acl = True |
- if self.sub_opts: |
- for o, a in self.sub_opts: |
- if o == '-n': |
- preserve_acl = False |
- elif o == '-h': |
- headers.append(a) |
- |
- if headers: |
- (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) |
- uri_args = self.args |
- else: |
- (metadata_minus, metadata_plus) = self._ParseMetadataSpec(self.args[0]) |
- uri_args = self.args[1:] |
- |
- if (len(uri_args) == 1 |
- and not self.suri_builder.StorageUri(uri_args[0]).names_object()): |
- raise CommandException('URI (%s) must name an object' % uri_args[0]) |
- |
- # Used to track if any objects' metadata failed to be set. |
- self.everything_set_okay = True |
- |
- def _SetMetadataExceptionHandler(e): |
- """Simple exception handler to allow post-completion status.""" |
- self.THREADED_LOGGER.error(str(e)) |
- self.everything_set_okay = False |
- |
- @Retry(GSResponseError, tries=3, delay=1, backoff=2) |
- def _SetMetadataFunc(name_expansion_result): |
- exp_src_uri = self.suri_builder.StorageUri( |
- name_expansion_result.GetExpandedUriStr()) |
- self.THREADED_LOGGER.info('Setting metadata on %s...', exp_src_uri) |
- |
- key = exp_src_uri.get_key() |
- meta_generation = key.meta_generation |
- generation = key.generation |
- |
- headers = {} |
- if generation: |
- headers['x-goog-if-generation-match'] = generation |
- if meta_generation: |
- headers['x-goog-if-metageneration-match'] = meta_generation |
- |
- # If this fails because of a precondition, it will raise a |
- # GSResponseError for @Retry to handle. |
- exp_src_uri.set_metadata(metadata_plus, metadata_minus, preserve_acl, |
- headers=headers) |
- |
- name_expansion_iterator = NameExpansionIterator( |
- self.command_name, self.proj_id_handler, self.headers, self.debug, |
- self.bucket_storage_uri_class, uri_args, self.recursion_requested, |
- self.recursion_requested) |
- |
- # Perform requests in parallel (-m) mode, if requested, using |
- # configured number of parallel processes and threads. Otherwise, |
- # perform requests with sequential function calls in current process. |
- self.Apply(_SetMetadataFunc, name_expansion_iterator, |
- _SetMetadataExceptionHandler) |
- |
- if not self.everything_set_okay: |
- raise CommandException('Metadata for some objects could not be set.') |
- |
- return 0 |
- |
- def _ParseMetadataHeaders(self, headers): |
- metadata_minus = set() |
- cust_metadata_minus = set() |
- metadata_plus = {} |
- cust_metadata_plus = {} |
- # Build a count of the keys encountered from each plus and minus arg so we |
- # can check for dupe field specs. |
- num_metadata_plus_elems = 0 |
- num_cust_metadata_plus_elems = 0 |
- num_metadata_minus_elems = 0 |
- num_cust_metadata_minus_elems = 0 |
- |
- for md_arg in headers: |
- parts = md_arg.split(':') |
- if len(parts) not in (1, 2): |
- raise CommandException( |
- 'Invalid argument: must be either header or header:value (%s)' % |
- md_arg) |
- if len(parts) == 2: |
- (header, value) = parts |
- else: |
- (header, value) = (parts[0], None) |
- _InsistAsciiHeader(header) |
- # Translate headers to lowercase to match the casing assumed by our |
- # sanity-checking operations. |
- header = header.lower() |
- if value: |
- if _IsCustomMeta(header): |
- # Allow non-ASCII data for custom metadata fields. Don't unicode |
- # encode other fields because that would perturb their content |
- # (e.g., adding %2F's into the middle of a Cache-Control value). |
- value = unicode(value, 'utf-8') |
- cust_metadata_plus[header] = value |
- num_cust_metadata_plus_elems += 1 |
- else: |
- metadata_plus[header] = value |
- num_metadata_plus_elems += 1 |
- else: |
- if _IsCustomMeta(header): |
- cust_metadata_minus.add(header) |
- num_cust_metadata_minus_elems += 1 |
- else: |
- metadata_minus.add(header) |
- num_metadata_minus_elems += 1 |
- if (num_metadata_plus_elems != len(metadata_plus) |
- or num_cust_metadata_plus_elems != len(cust_metadata_plus) |
- or num_metadata_minus_elems != len(metadata_minus) |
- or num_cust_metadata_minus_elems != len(cust_metadata_minus) |
- or metadata_minus.intersection(set(metadata_plus.keys()))): |
- raise CommandException('Each header must appear at most once.') |
- other_than_base_fields = (set(metadata_plus.keys()) |
- .difference(Key.base_user_settable_fields)) |
- other_than_base_fields.update( |
- metadata_minus.difference(Key.base_user_settable_fields)) |
- for f in other_than_base_fields: |
- # This check is overly simple; it would be stronger to check, for each |
- # URI argument, whether f.startswith the |
- # uri.get_provider().metadata_prefix, but here we just parse the spec |
- # once, before processing any of the URIs. This means we will not |
- # detect if the user tries to set an x-goog-meta- field on an another |
- # provider's object, for example. |
- if not _IsCustomMeta(f): |
- raise CommandException('Invalid or disallowed header (%s).\n' |
- 'Only these fields (plus x-goog-meta-* fields)' |
- ' can be set or unset:\n%s' % (f, |
- sorted(list(Key.base_user_settable_fields)))) |
- metadata_plus.update(cust_metadata_plus) |
- metadata_minus.update(cust_metadata_minus) |
- return (metadata_minus, metadata_plus) |
- |
- def _ParseMetadataSpec(self, spec): |
- self.THREADED_LOGGER.info('WARNING: metadata spec syntax (%s)\nis ' |
- 'deprecated and will eventually be removed.\n' |
- 'Please see "gsutil help setmeta" for current ' |
- 'syntax' % spec) |
- metadata_minus = set() |
- cust_metadata_minus = set() |
- metadata_plus = {} |
- cust_metadata_plus = {} |
- # Build a count of the keys encountered from each plus and minus arg so we |
- # can check for dupe field specs. |
- num_metadata_plus_elems = 0 |
- num_cust_metadata_plus_elems = 0 |
- num_metadata_minus_elems = 0 |
- num_cust_metadata_minus_elems = 0 |
- |
- mdf = StringIO.StringIO(spec) |
- for md_arg in csv.reader(mdf).next(): |
- if not md_arg: |
- raise CommandException( |
- 'Invalid empty metadata specification component.') |
- if md_arg[0] == '-': |
- header = md_arg[1:] |
- if header.find(':') != -1: |
- raise CommandException('Removal spec may not contain ":" (%s).' % |
- header) |
- _InsistAsciiHeader(header) |
- # Translate headers to lowercase to match the casing required by |
- # uri.set_metadata(). |
- header = header.lower() |
- if _IsCustomMeta(header): |
- cust_metadata_minus.add(header) |
- num_cust_metadata_minus_elems += 1 |
- else: |
- metadata_minus.add(header) |
- num_metadata_minus_elems += 1 |
- else: |
- parts = md_arg.split(':', 1) |
- if len(parts) != 2: |
- raise CommandException( |
- 'Fields being added must include values (%s).' % md_arg) |
- (header, value) = parts |
- _InsistAsciiHeader(header) |
- header = header.lower() |
- if _IsCustomMeta(header): |
- # Allow non-ASCII data for custom metadata fields. Don't unicode |
- # encode other fields because that would perturb their content |
- # (e.g., adding %2F's into the middle of a Cache-Control value). |
- value = unicode(value, 'utf-8') |
- cust_metadata_plus[header] = value |
- num_cust_metadata_plus_elems += 1 |
- else: |
- metadata_plus[header] = value |
- num_metadata_plus_elems += 1 |
- mdf.close() |
- if (num_metadata_plus_elems != len(metadata_plus) |
- or num_cust_metadata_plus_elems != len(cust_metadata_plus) |
- or num_metadata_minus_elems != len(metadata_minus) |
- or num_cust_metadata_minus_elems != len(cust_metadata_minus) |
- or metadata_minus.intersection(set(metadata_plus.keys()))): |
- raise CommandException('Each header must appear at most once.') |
- other_than_base_fields = (set(metadata_plus.keys()) |
- .difference(Key.base_user_settable_fields)) |
- other_than_base_fields.update( |
- metadata_minus.difference(Key.base_user_settable_fields)) |
- for f in other_than_base_fields: |
- # This check is overly simple; it would be stronger to check, for each |
- # URI argument, whether f.startswith the |
- # uri.get_provider().metadata_prefix, but here we just parse the spec |
- # once, before processing any of the URIs. This means we will not |
- # detect if the user tries to set an x-goog-meta- field on an another |
- # provider's object, for example. |
- if not _IsCustomMeta(f): |
- raise CommandException('Invalid or disallowed header (%s).\n' |
- 'Only these fields (plus x-goog-meta-* fields)' |
- ' can be set or unset:\n%s' % (f, |
- sorted(list(Key.base_user_settable_fields)))) |
- metadata_plus.update(cust_metadata_plus) |
- metadata_minus.update(cust_metadata_minus) |
- return (metadata_minus, metadata_plus) |
- |
- |
-def _InsistAsciiHeader(header): |
- if not all(ord(c) < 128 for c in header): |
- raise CommandException('Invalid non-ASCII header (%s).' % header) |
- |
-def _IsCustomMeta(header): |
- return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-') |