OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2012 Google Inc. All Rights Reserved. |
| 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at |
| 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. |
| 15 """Implementation of setmeta command for setting cloud object metadata.""" |
| 16 |
| 17 from __future__ import absolute_import |
| 18 |
| 19 from gslib.cloud_api import AccessDeniedException |
| 20 from gslib.cloud_api import PreconditionException |
| 21 from gslib.cloud_api import Preconditions |
| 22 from gslib.command import Command |
| 23 from gslib.command_argument import CommandArgument |
| 24 from gslib.cs_api_map import ApiSelector |
| 25 from gslib.exception import CommandException |
| 26 from gslib.name_expansion import NameExpansionIterator |
| 27 from gslib.storage_url import StorageUrlFromString |
| 28 from gslib.translation_helper import CopyObjectMetadata |
| 29 from gslib.translation_helper import ObjectMetadataFromHeaders |
| 30 from gslib.translation_helper import PreconditionsFromHeaders |
| 31 from gslib.util import GetCloudApiInstance |
| 32 from gslib.util import NO_MAX |
| 33 from gslib.util import Retry |
| 34 |
| 35 |
| 36 _SYNOPSIS = """ |
| 37 gsutil setmeta -h [header:value|header] ... url... |
| 38 """ |
| 39 |
| 40 _DETAILED_HELP_TEXT = (""" |
| 41 <B>SYNOPSIS</B> |
| 42 """ + _SYNOPSIS + """ |
| 43 |
| 44 |
| 45 <B>DESCRIPTION</B> |
| 46 The gsutil setmeta command allows you to set or remove the metadata on one |
| 47 or more objects. It takes one or more header arguments followed by one or |
| 48 more URLs, where each header argument is in one of two forms: |
| 49 |
| 50 - if you specify header:value, it will set the given header on all |
| 51 named objects. |
| 52 |
| 53 - if you specify header (with no value), it will remove the given header |
| 54 from all named objects. |
| 55 |
| 56 For example, the following command would set the Content-Type and |
| 57 Cache-Control and remove the Content-Disposition on the specified objects: |
| 58 |
| 59 gsutil setmeta -h "Content-Type:text/html" \\ |
| 60 -h "Cache-Control:public, max-age=3600" \\ |
| 61 -h "Content-Disposition" gs://bucket/*.html |
| 62 |
| 63 If you have a large number of objects to update you might want to use the |
| 64 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) |
| 65 update: |
| 66 |
| 67 gsutil -m setmeta -h "Content-Type:text/html" \\ |
| 68 -h "Cache-Control:public, max-age=3600" \\ |
| 69 -h "Content-Disposition" gs://bucket/*.html |
| 70 |
| 71 You can also use the setmeta command to set custom metadata on an object: |
| 72 |
| 73 gsutil setmeta -h "x-goog-meta-icecreamflavor:vanilla" gs://bucket/object |
| 74 |
| 75 See "gsutil help metadata" for details about how you can set metadata |
| 76 while uploading objects, what metadata fields can be set and the meaning of |
| 77 these fields, use of custom metadata, and how to view currently set metadata. |
| 78 |
| 79 NOTE: By default, publicly readable objects are served with a Cache-Control |
| 80 header allowing such objects to be cached for 3600 seconds. For more details |
| 81 about this default behavior see the CACHE-CONTROL section of |
| 82 "gsutil help metadata". If you need to ensure that updates become visible |
| 83 immediately, you should set a Cache-Control header of "Cache-Control:private, |
| 84 max-age=0, no-transform" on such objects. You can do this with the command: |
| 85 |
| 86 gsutil setmeta -h "Content-Type:text/html" \\ |
| 87 -h "Cache-Control:private, max-age=0, no-transform" gs://bucket/*.html |
| 88 |
| 89 The setmeta command reads each object's current generation and metageneration |
| 90 and uses those as preconditions unless they are otherwise specified by |
| 91 top-level arguments. For example: |
| 92 |
| 93 gsutil -h "x-goog-if-metageneration-match:2" setmeta |
| 94 -h "x-goog-meta-icecreamflavor:vanilla" |
| 95 |
| 96 will set the icecreamflavor:vanilla metadata if the current live object has a |
| 97 metageneration of 2. |
| 98 |
| 99 <B>OPTIONS</B> |
| 100 -h Specifies a header:value to be added, or header to be removed, |
| 101 from each named object. |
| 102 """) |
| 103 |
| 104 # Setmeta assumes a header-like model which doesn't line up with the JSON way |
| 105 # of doing things. This list comes from functionality that was supported by |
| 106 # gsutil3 at the time gsutil4 was released. |
| 107 SETTABLE_FIELDS = ['cache-control', 'content-disposition', |
| 108 'content-encoding', 'content-language', |
| 109 'content-md5', 'content-type'] |
| 110 |
| 111 |
| 112 def _SetMetadataExceptionHandler(cls, e): |
| 113 """Exception handler that maintains state about post-completion status.""" |
| 114 cls.logger.error(e) |
| 115 cls.everything_set_okay = False |
| 116 |
| 117 |
| 118 def _SetMetadataFuncWrapper(cls, name_expansion_result, thread_state=None): |
| 119 cls.SetMetadataFunc(name_expansion_result, thread_state=thread_state) |
| 120 |
| 121 |
| 122 class SetMetaCommand(Command): |
| 123 """Implementation of gsutil setmeta command.""" |
| 124 |
| 125 # Command specification. See base class for documentation. |
| 126 command_spec = Command.CreateCommandSpec( |
| 127 'setmeta', |
| 128 command_name_aliases=['setheader'], |
| 129 usage_synopsis=_SYNOPSIS, |
| 130 min_args=1, |
| 131 max_args=NO_MAX, |
| 132 supported_sub_args='h:rR', |
| 133 file_url_ok=False, |
| 134 provider_url_ok=False, |
| 135 urls_start_arg=1, |
| 136 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| 137 gs_default_api=ApiSelector.JSON, |
| 138 argparse_arguments=[ |
| 139 CommandArgument.MakeZeroOrMoreCloudURLsArgument() |
| 140 ] |
| 141 ) |
| 142 # Help specification. See help_provider.py for documentation. |
| 143 help_spec = Command.HelpSpec( |
| 144 help_name='setmeta', |
| 145 help_name_aliases=['setheader'], |
| 146 help_type='command_help', |
| 147 help_one_line_summary='Set metadata on already uploaded objects', |
| 148 help_text=_DETAILED_HELP_TEXT, |
| 149 subcommand_help_text={}, |
| 150 ) |
| 151 |
| 152 def RunCommand(self): |
| 153 """Command entry point for the setmeta command.""" |
| 154 headers = [] |
| 155 if self.sub_opts: |
| 156 for o, a in self.sub_opts: |
| 157 if o == '-h': |
| 158 if 'x-goog-acl' in a or 'x-amz-acl' in a: |
| 159 raise CommandException( |
| 160 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' |
| 161 'set ... to set canned ACLs.') |
| 162 headers.append(a) |
| 163 |
| 164 (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) |
| 165 |
| 166 self.metadata_change = metadata_plus |
| 167 for header in metadata_minus: |
| 168 self.metadata_change[header] = '' |
| 169 |
| 170 if len(self.args) == 1 and not self.recursion_requested: |
| 171 url = StorageUrlFromString(self.args[0]) |
| 172 if not (url.IsCloudUrl() and url.IsObject()): |
| 173 raise CommandException('URL (%s) must name an object' % self.args[0]) |
| 174 |
| 175 # Used to track if any objects' metadata failed to be set. |
| 176 self.everything_set_okay = True |
| 177 |
| 178 self.preconditions = PreconditionsFromHeaders(self.headers) |
| 179 |
| 180 name_expansion_iterator = NameExpansionIterator( |
| 181 self.command_name, self.debug, self.logger, self.gsutil_api, |
| 182 self.args, self.recursion_requested, all_versions=self.all_versions, |
| 183 continue_on_error=self.parallel_operations) |
| 184 |
| 185 try: |
| 186 # Perform requests in parallel (-m) mode, if requested, using |
| 187 # configured number of parallel processes and threads. Otherwise, |
| 188 # perform requests with sequential function calls in current process. |
| 189 self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, |
| 190 _SetMetadataExceptionHandler, fail_on_error=True) |
| 191 except AccessDeniedException as e: |
| 192 if e.status == 403: |
| 193 self._WarnServiceAccounts() |
| 194 raise |
| 195 |
| 196 if not self.everything_set_okay: |
| 197 raise CommandException('Metadata for some objects could not be set.') |
| 198 |
| 199 return 0 |
| 200 |
| 201 @Retry(PreconditionException, tries=3, timeout_secs=1) |
| 202 def SetMetadataFunc(self, name_expansion_result, thread_state=None): |
| 203 """Sets metadata on an object. |
| 204 |
| 205 Args: |
| 206 name_expansion_result: NameExpansionResult describing target object. |
| 207 thread_state: gsutil Cloud API instance to use for the operation. |
| 208 """ |
| 209 gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) |
| 210 |
| 211 exp_src_url = name_expansion_result.expanded_storage_url |
| 212 self.logger.info('Setting metadata on %s...', exp_src_url) |
| 213 |
| 214 fields = ['generation', 'metadata', 'metageneration'] |
| 215 cloud_obj_metadata = gsutil_api.GetObjectMetadata( |
| 216 exp_src_url.bucket_name, exp_src_url.object_name, |
| 217 generation=exp_src_url.generation, provider=exp_src_url.scheme, |
| 218 fields=fields) |
| 219 |
| 220 preconditions = Preconditions( |
| 221 gen_match=self.preconditions.gen_match, |
| 222 meta_gen_match=self.preconditions.meta_gen_match) |
| 223 if preconditions.gen_match is None: |
| 224 preconditions.gen_match = cloud_obj_metadata.generation |
| 225 if preconditions.meta_gen_match is None: |
| 226 preconditions.meta_gen_match = cloud_obj_metadata.metageneration |
| 227 |
| 228 # Patch handles the patch semantics for most metadata, but we need to |
| 229 # merge the custom metadata field manually. |
| 230 patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change) |
| 231 |
| 232 api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme) |
| 233 # For XML we only want to patch through custom metadata that has |
| 234 # changed. For JSON we need to build the complete set. |
| 235 if api == ApiSelector.XML: |
| 236 pass |
| 237 elif api == ApiSelector.JSON: |
| 238 CopyObjectMetadata(patch_obj_metadata, cloud_obj_metadata, |
| 239 override=True) |
| 240 patch_obj_metadata = cloud_obj_metadata |
| 241 # Patch body does not need the object generation and metageneration. |
| 242 patch_obj_metadata.generation = None |
| 243 patch_obj_metadata.metageneration = None |
| 244 |
| 245 gsutil_api.PatchObjectMetadata( |
| 246 exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata, |
| 247 generation=exp_src_url.generation, preconditions=preconditions, |
| 248 provider=exp_src_url.scheme) |
| 249 |
| 250 def _ParseMetadataHeaders(self, headers): |
| 251 """Validates and parses metadata changes from the headers argument. |
| 252 |
| 253 Args: |
| 254 headers: Header dict to validate and parse. |
| 255 |
| 256 Returns: |
| 257 (metadata_plus, metadata_minus): Tuple of header sets to add and remove. |
| 258 """ |
| 259 metadata_minus = set() |
| 260 cust_metadata_minus = set() |
| 261 metadata_plus = {} |
| 262 cust_metadata_plus = {} |
| 263 # Build a count of the keys encountered from each plus and minus arg so we |
| 264 # can check for dupe field specs. |
| 265 num_metadata_plus_elems = 0 |
| 266 num_cust_metadata_plus_elems = 0 |
| 267 num_metadata_minus_elems = 0 |
| 268 num_cust_metadata_minus_elems = 0 |
| 269 |
| 270 for md_arg in headers: |
| 271 parts = md_arg.split(':') |
| 272 if len(parts) not in (1, 2): |
| 273 raise CommandException( |
| 274 'Invalid argument: must be either header or header:value (%s)' % |
| 275 md_arg) |
| 276 if len(parts) == 2: |
| 277 (header, value) = parts |
| 278 else: |
| 279 (header, value) = (parts[0], None) |
| 280 _InsistAsciiHeader(header) |
| 281 # Translate headers to lowercase to match the casing assumed by our |
| 282 # sanity-checking operations. |
| 283 header = header.lower() |
| 284 if value: |
| 285 if _IsCustomMeta(header): |
| 286 # Allow non-ASCII data for custom metadata fields. |
| 287 cust_metadata_plus[header] = value |
| 288 num_cust_metadata_plus_elems += 1 |
| 289 else: |
| 290 # Don't unicode encode other fields because that would perturb their |
| 291 # content (e.g., adding %2F's into the middle of a Cache-Control |
| 292 # value). |
| 293 _InsistAsciiHeaderValue(header, value) |
| 294 value = str(value) |
| 295 metadata_plus[header] = value |
| 296 num_metadata_plus_elems += 1 |
| 297 else: |
| 298 if _IsCustomMeta(header): |
| 299 cust_metadata_minus.add(header) |
| 300 num_cust_metadata_minus_elems += 1 |
| 301 else: |
| 302 metadata_minus.add(header) |
| 303 num_metadata_minus_elems += 1 |
| 304 |
| 305 if (num_metadata_plus_elems != len(metadata_plus) |
| 306 or num_cust_metadata_plus_elems != len(cust_metadata_plus) |
| 307 or num_metadata_minus_elems != len(metadata_minus) |
| 308 or num_cust_metadata_minus_elems != len(cust_metadata_minus) |
| 309 or metadata_minus.intersection(set(metadata_plus.keys()))): |
| 310 raise CommandException('Each header must appear at most once.') |
| 311 other_than_base_fields = (set(metadata_plus.keys()) |
| 312 .difference(SETTABLE_FIELDS)) |
| 313 other_than_base_fields.update( |
| 314 metadata_minus.difference(SETTABLE_FIELDS)) |
| 315 for f in other_than_base_fields: |
| 316 # This check is overly simple; it would be stronger to check, for each |
| 317 # URL argument, whether f.startswith the |
| 318 # provider metadata_prefix, but here we just parse the spec |
| 319 # once, before processing any of the URLs. This means we will not |
| 320 # detect if the user tries to set an x-goog-meta- field on an another |
| 321 # provider's object, for example. |
| 322 if not _IsCustomMeta(f): |
| 323 raise CommandException( |
| 324 'Invalid or disallowed header (%s).\nOnly these fields (plus ' |
| 325 'x-goog-meta-* fields) can be set or unset:\n%s' % ( |
| 326 f, sorted(list(SETTABLE_FIELDS)))) |
| 327 metadata_plus.update(cust_metadata_plus) |
| 328 metadata_minus.update(cust_metadata_minus) |
| 329 return (metadata_minus, metadata_plus) |
| 330 |
| 331 |
| 332 def _InsistAscii(string, message): |
| 333 if not all(ord(c) < 128 for c in string): |
| 334 raise CommandException(message) |
| 335 |
| 336 |
| 337 def _InsistAsciiHeader(header): |
| 338 _InsistAscii(header, 'Invalid non-ASCII header (%s).' % header) |
| 339 |
| 340 |
| 341 def _InsistAsciiHeaderValue(header, value): |
| 342 _InsistAscii( |
| 343 value, ('Invalid non-ASCII value (%s) was provided for header %s.' |
| 344 % (value, header))) |
| 345 |
| 346 |
| 347 def _IsCustomMeta(header): |
| 348 return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-') |
OLD | NEW |