OLD | NEW |
(Empty) | |
| 1 # Copyright 2012 Google Inc. |
| 2 #coding=utf8 |
| 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at |
| 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. |
| 15 |
| 16 import boto |
| 17 import csv |
| 18 import StringIO |
| 19 |
| 20 from boto.s3.key import Key |
| 21 from gslib.command import COMMAND_NAME |
| 22 from gslib.command import COMMAND_NAME_ALIASES |
| 23 from gslib.command import CONFIG_REQUIRED |
| 24 from gslib.command import Command |
| 25 from gslib.command import FILE_URIS_OK |
| 26 from gslib.command import MAX_ARGS |
| 27 from gslib.command import MIN_ARGS |
| 28 from gslib.command import PROVIDER_URIS_OK |
| 29 from gslib.command import SUPPORTED_SUB_ARGS |
| 30 from gslib.command import URIS_START_ARG |
| 31 from gslib.exception import CommandException |
| 32 from gslib.help_provider import HELP_NAME |
| 33 from gslib.help_provider import HELP_NAME_ALIASES |
| 34 from gslib.help_provider import HELP_ONE_LINE_SUMMARY |
| 35 from gslib.help_provider import HELP_TEXT |
| 36 from gslib.help_provider import HELP_TYPE |
| 37 from gslib.help_provider import HelpType |
| 38 from gslib.name_expansion import NameExpansionIterator |
| 39 from gslib.util import NO_MAX |
| 40 |
| 41 _detailed_help_text = (""" |
| 42 <B>SYNOPSIS</B> |
| 43 gsutil setmeta [-n] -h [header:value|header] ... uri... |
| 44 |
| 45 |
| 46 <B>DESCRIPTION</B> |
| 47 The gsutil setmeta command allows you to set or remove the metadata on one |
| 48 or more objects. It takes one or more header arguments followed by one or |
| 49 more URIs, where each header argument is in one of two forms: |
| 50 |
| 51 - if you specify header:value, it will set the given header on all |
| 52 named objects. |
| 53 |
| 54 - if you specify header (with no value), it will remove the given header |
| 55 from all named objects. |
| 56 |
| 57 For example, the following command would set the Content-Type and |
| 58 Cache-Control and remove the Content-Disposition on the specified objects: |
| 59 |
| 60 gsutil setmeta -h "Content-Type:text/html" -h "Cache-Control:public, max-age
=3600" -h "Content-Disposition" gs://bucket/*.html |
| 61 |
| 62 If you have a large number of objects to update you might want to use the |
| 63 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) |
| 64 update: |
| 65 |
| 66 gsutil -m setmeta -h "Content-Type:text/html" -h "Cache-Control:public, max-
age=3600" -h "Content-Disposition" gs://bucket/*.html |
| 67 |
| 68 See "gsutil help metadata" for details about how you can set metadata |
| 69 while uploading objects, what metadata fields can be set and the meaning of |
| 70 these fields, use of custom metadata, and how to view currently set metadata. |
| 71 |
| 72 |
| 73 <B>OPERATION COST</B> |
| 74 This command uses four operations per URI (one to read the ACL, one to read |
| 75 the current metadata, one to set the new metadata, and one to set the ACL). |
| 76 |
| 77 For cases where you want all objects to have the same ACL you can avoid half |
| 78 these operations by setting a default ACL on the bucket(s) containing the |
| 79 named objects, and using the setmeta -n option. See "help gsutil setdefacl". |
| 80 |
| 81 |
| 82 <B>OPTIONS</B> |
| 83 -h Specifies a header:value to be added, or header to be removed, |
| 84 from each named object. |
| 85 -n Causes the operations for reading and writing the ACL to be |
| 86 skipped. This halves the number of operations performed per |
| 87 request, improving the speed and reducing the cost of performing |
| 88 the operations. This option makes sense for cases where you want |
| 89 all objects to have the same ACL, for which you have set a default |
| 90 ACL on the bucket(s) containing the objects. See "help gsutil |
| 91 setdefacl". |
| 92 |
| 93 |
| 94 <B>OLDER SYNTAX (DEPRECATED)</B> |
| 95 The first version of the setmeta command used more complicated syntax |
| 96 (described below). gsutil still supports this syntax, to avoid breaking |
| 97 existing customer uses, but it is now deprecated and will eventually |
| 98 be removed. |
| 99 |
| 100 With this older syntax, the setmeta command accepts a single metadata |
| 101 argument in one of two forms: |
| 102 |
| 103 gsutil setmeta [-n] header:value uri... |
| 104 |
| 105 or |
| 106 |
| 107 gsutil setmeta [-n] '"header:value","-header",...' uri... |
| 108 |
| 109 The first form allows you to specify a single header name and value to |
| 110 set. For example, the following command would set the Content-Type and |
| 111 Cache-Control and remove the Content-Disposition on the specified objects: |
| 112 |
| 113 gsutil setmeta -h "Content-Type:text/html" -h "Cache-Control:public, max-age
=3600" -h "Content-Disposition" gs://bucket/*.html |
| 114 |
| 115 This form only works if the header name and value don't contain double |
| 116 quotes or commas, and only works for setting the header value (not for |
| 117 removing it). |
| 118 |
| 119 The more general form of the first argument allows both setting and removing |
| 120 multiple fields, without any of the content restrictions noted above. For |
| 121 this variant the first argument is a CSV-formatted list of headers to add |
| 122 or remove. Getting the CSV-formatted list to be passed correctly into gsutil |
| 123 requires different syntax on Linux or MacOS than it does on Windows. |
| 124 |
| 125 On Linux or MacOS you need to surround the entire argument in single quotes |
| 126 to avoid having the shell interpret/strip out the double-quotes in the CSV |
| 127 data. For example, the following command would set the Content-Type and |
| 128 Cache-Control and remove the Content-Disposition on the specified objects: |
| 129 |
| 130 gsutil setmeta '"Content-Type:text/html","Cache-Control:public, max-age=3600
","-Content-Disposition"' gs://bucket/*.html |
| 131 |
| 132 To pass CSV data on Windows you need two sets of double quotes around |
| 133 each header/value pair, and one set of double quotes around the entire |
| 134 expression. For example, the following command would set the Content-Type |
| 135 and Cache-Control and remove the Content-Disposition on the specified objects: |
| 136 |
| 137 gsutil setmeta "\""Content-Type:text/html"",""Cache-Control:public, max-age=
3600"",""-Content-Disposition""\" gs://bucket/*.html |
| 138 |
| 139 |
| 140 <B>WARNING ABOUT USING SETMETA WITH VERSIONING ENABLED</B> |
| 141 |
| 142 Note that if you use the gsutil setmeta command on an object in a bucket |
| 143 with versioning enabled (see 'gsutil help versioning'), it will create |
| 144 a new object version (and thus, you will get charged for the space required |
| 145 for holding the additional version. |
| 146 """) |
| 147 |
| 148 |
| 149 class SetMetaCommand(Command): |
| 150 """Implementation of gsutil setmeta command.""" |
| 151 |
| 152 # Command specification (processed by parent class). |
| 153 command_spec = { |
| 154 # Name of command. |
| 155 COMMAND_NAME : 'setmeta', |
| 156 # List of command name aliases. |
| 157 COMMAND_NAME_ALIASES : ['setheader'], |
| 158 # Min number of args required by this command. |
| 159 MIN_ARGS : 1, |
| 160 # Max number of args required by this command, or NO_MAX. |
| 161 MAX_ARGS : NO_MAX, |
| 162 # Getopt-style string specifying acceptable sub args. |
| 163 SUPPORTED_SUB_ARGS : 'h:n', |
| 164 # True if file URIs acceptable for this command. |
| 165 FILE_URIS_OK : False, |
| 166 # True if provider-only URIs acceptable for this command. |
| 167 PROVIDER_URIS_OK : False, |
| 168 # Index in args of first URI arg. |
| 169 URIS_START_ARG : 1, |
| 170 # True if must configure gsutil before running command. |
| 171 CONFIG_REQUIRED : True, |
| 172 } |
| 173 help_spec = { |
| 174 # Name of command or auxiliary help info for which this help applies. |
| 175 HELP_NAME : 'setmeta', |
| 176 # List of help name aliases. |
| 177 HELP_NAME_ALIASES : ['setheader'], |
| 178 # Type of help: |
| 179 HELP_TYPE : HelpType.COMMAND_HELP, |
| 180 # One line summary of this help. |
| 181 HELP_ONE_LINE_SUMMARY : 'Set metadata on already uploaded objects', |
| 182 # The full help text. |
| 183 HELP_TEXT : _detailed_help_text, |
| 184 } |
| 185 |
| 186 # Command entry point. |
| 187 def RunCommand(self): |
| 188 headers = [] |
| 189 preserve_acl = True |
| 190 if self.sub_opts: |
| 191 for o, a in self.sub_opts: |
| 192 if o == '-n': |
| 193 preserve_acl = False |
| 194 elif o == '-h': |
| 195 headers.append(a) |
| 196 |
| 197 if headers: |
| 198 (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) |
| 199 uri_args = self.args |
| 200 else: |
| 201 (metadata_minus, metadata_plus) = self._ParseMetadataSpec(self.args[0]) |
| 202 uri_args = self.args[1:] |
| 203 |
| 204 if (len(uri_args) == 1 |
| 205 and not self.suri_builder.StorageUri(uri_args[0]).names_object()): |
| 206 raise CommandException('URI (%s) must name an object' % uri_args[0]) |
| 207 |
| 208 # Used to track if any objects' metadata failed to be set. |
| 209 self.everything_set_okay = True |
| 210 |
| 211 def _SetMetadataExceptionHandler(e): |
| 212 """Simple exception handler to allow post-completion status.""" |
| 213 self.THREADED_LOGGER.error(str(e)) |
| 214 self.everything_set_okay = False |
| 215 |
| 216 def _SetMetadataFunc(name_expansion_result): |
| 217 exp_src_uri = self.suri_builder.StorageUri( |
| 218 name_expansion_result.GetExpandedUriStr()) |
| 219 self.THREADED_LOGGER.info('Setting metadata on %s...' % exp_src_uri) |
| 220 exp_src_uri.set_metadata(metadata_plus, metadata_minus, preserve_acl) |
| 221 |
| 222 name_expansion_iterator = NameExpansionIterator( |
| 223 self.command_name, self.proj_id_handler, self.headers, self.debug, |
| 224 self.bucket_storage_uri_class, uri_args, self.recursion_requested, |
| 225 self.recursion_requested) |
| 226 |
| 227 # Perform requests in parallel (-m) mode, if requested, using |
| 228 # configured number of parallel processes and threads. Otherwise, |
| 229 # perform requests with sequential function calls in current process. |
| 230 self.Apply(_SetMetadataFunc, name_expansion_iterator, |
| 231 _SetMetadataExceptionHandler) |
| 232 |
| 233 if not self.everything_set_okay: |
| 234 raise CommandException('Metadata for some objects could not be set.') |
| 235 |
| 236 return 0 |
| 237 |
| 238 def _ParseMetadataHeaders(self, headers): |
| 239 metadata_minus = set() |
| 240 cust_metadata_minus = set() |
| 241 metadata_plus = {} |
| 242 cust_metadata_plus = {} |
| 243 # Build a count of the keys encountered from each plus and minus arg so we |
| 244 # can check for dupe field specs. |
| 245 num_metadata_plus_elems = 0 |
| 246 num_cust_metadata_plus_elems = 0 |
| 247 num_metadata_minus_elems = 0 |
| 248 num_cust_metadata_minus_elems = 0 |
| 249 |
| 250 for md_arg in headers: |
| 251 parts = md_arg.split(':') |
| 252 if len(parts) not in (1, 2): |
| 253 raise CommandException( |
| 254 'Invalid argument: must be either header or header:value (%s)' % |
| 255 md_arg) |
| 256 if len(parts) == 2: |
| 257 (header, value) = parts |
| 258 else: |
| 259 (header, value) = (parts[0], None) |
| 260 _InsistAsciiHeader(header) |
| 261 # Translate headers to lowercase to match the casing assumed by our |
| 262 # sanity-checking operations. |
| 263 header = header.lower() |
| 264 if value: |
| 265 if _IsCustomMeta(header): |
| 266 # Allow non-ASCII data for custom metadata fields. Don't unicode |
| 267 # encode other fields because that would perturb their content |
| 268 # (e.g., adding %2F's into the middle of a Cache-Control value). |
| 269 value = unicode(value, 'utf-8') |
| 270 cust_metadata_plus[header] = value |
| 271 num_cust_metadata_plus_elems += 1 |
| 272 else: |
| 273 metadata_plus[header] = value |
| 274 num_metadata_plus_elems += 1 |
| 275 else: |
| 276 if _IsCustomMeta(header): |
| 277 cust_metadata_minus.add(header) |
| 278 num_cust_metadata_minus_elems += 1 |
| 279 else: |
| 280 metadata_minus.add(header) |
| 281 num_metadata_minus_elems += 1 |
| 282 if (num_metadata_plus_elems != len(metadata_plus) |
| 283 or num_cust_metadata_plus_elems != len(cust_metadata_plus) |
| 284 or num_metadata_minus_elems != len(metadata_minus) |
| 285 or num_cust_metadata_minus_elems != len(cust_metadata_minus) |
| 286 or metadata_minus.intersection(set(metadata_plus.keys()))): |
| 287 raise CommandException('Each header must appear at most once.') |
| 288 other_than_base_fields = (set(metadata_plus.keys()) |
| 289 .difference(Key.base_user_settable_fields)) |
| 290 other_than_base_fields.update( |
| 291 metadata_minus.difference(Key.base_user_settable_fields)) |
| 292 for f in other_than_base_fields: |
| 293 # This check is overly simple; it would be stronger to check, for each |
| 294 # URI argument, whether f.startswith the |
| 295 # uri.get_provider().metadata_prefix, but here we just parse the spec |
| 296 # once, before processing any of the URIs. This means we will not |
| 297 # detect if the user tries to set an x-goog-meta- field on an another |
| 298 # provider's object, for example. |
| 299 if not _IsCustomMeta(f): |
| 300 raise CommandException('Invalid or disallowed header (%s).\n' |
| 301 'Only these fields (plus x-goog-meta-* fields)' |
| 302 ' can be set or unset:\n%s' % (f, |
| 303 sorted(list(Key.base_user_settable_fields)))) |
| 304 metadata_plus.update(cust_metadata_plus) |
| 305 metadata_minus.update(cust_metadata_minus) |
| 306 return (metadata_minus, metadata_plus) |
| 307 |
| 308 def _ParseMetadataSpec(self, spec): |
| 309 self.THREADED_LOGGER.info('WARNING: metadata spec syntax (%s)\nis ' |
| 310 'deprecated and will eventually be removed.\n' |
| 311 'Please see "gsutil help setmeta" for current ' |
| 312 'syntax' % spec) |
| 313 metadata_minus = set() |
| 314 cust_metadata_minus = set() |
| 315 metadata_plus = {} |
| 316 cust_metadata_plus = {} |
| 317 # Build a count of the keys encountered from each plus and minus arg so we |
| 318 # can check for dupe field specs. |
| 319 num_metadata_plus_elems = 0 |
| 320 num_cust_metadata_plus_elems = 0 |
| 321 num_metadata_minus_elems = 0 |
| 322 num_cust_metadata_minus_elems = 0 |
| 323 |
| 324 mdf = StringIO.StringIO(spec) |
| 325 for md_arg in csv.reader(mdf).next(): |
| 326 if not md_arg: |
| 327 raise CommandException( |
| 328 'Invalid empty metadata specification component.') |
| 329 if md_arg[0] == '-': |
| 330 header = md_arg[1:] |
| 331 if header.find(':') != -1: |
| 332 raise CommandException('Removal spec may not contain ":" (%s).' % |
| 333 header) |
| 334 _InsistAsciiHeader(header) |
| 335 # Translate headers to lowercase to match the casing required by |
| 336 # uri.set_metadata(). |
| 337 header = header.lower() |
| 338 if _IsCustomMeta(header): |
| 339 cust_metadata_minus.add(header) |
| 340 num_cust_metadata_minus_elems += 1 |
| 341 else: |
| 342 metadata_minus.add(header) |
| 343 num_metadata_minus_elems += 1 |
| 344 else: |
| 345 parts = md_arg.split(':', 1) |
| 346 if len(parts) != 2: |
| 347 raise CommandException( |
| 348 'Fields being added must include values (%s).' % md_arg) |
| 349 (header, value) = parts |
| 350 _InsistAsciiHeader(header) |
| 351 header = header.lower() |
| 352 if _IsCustomMeta(header): |
| 353 # Allow non-ASCII data for custom metadata fields. Don't unicode |
| 354 # encode other fields because that would perturb their content |
| 355 # (e.g., adding %2F's into the middle of a Cache-Control value). |
| 356 value = unicode(value, 'utf-8') |
| 357 cust_metadata_plus[header] = value |
| 358 num_cust_metadata_plus_elems += 1 |
| 359 else: |
| 360 metadata_plus[header] = value |
| 361 num_metadata_plus_elems += 1 |
| 362 mdf.close() |
| 363 if (num_metadata_plus_elems != len(metadata_plus) |
| 364 or num_cust_metadata_plus_elems != len(cust_metadata_plus) |
| 365 or num_metadata_minus_elems != len(metadata_minus) |
| 366 or num_cust_metadata_minus_elems != len(cust_metadata_minus) |
| 367 or metadata_minus.intersection(set(metadata_plus.keys()))): |
| 368 raise CommandException('Each header must appear at most once.') |
| 369 other_than_base_fields = (set(metadata_plus.keys()) |
| 370 .difference(Key.base_user_settable_fields)) |
| 371 other_than_base_fields.update( |
| 372 metadata_minus.difference(Key.base_user_settable_fields)) |
| 373 for f in other_than_base_fields: |
| 374 # This check is overly simple; it would be stronger to check, for each |
| 375 # URI argument, whether f.startswith the |
| 376 # uri.get_provider().metadata_prefix, but here we just parse the spec |
| 377 # once, before processing any of the URIs. This means we will not |
| 378 # detect if the user tries to set an x-goog-meta- field on an another |
| 379 # provider's object, for example. |
| 380 if not _IsCustomMeta(f): |
| 381 raise CommandException('Invalid or disallowed header (%s).\n' |
| 382 'Only these fields (plus x-goog-meta-* fields)' |
| 383 ' can be set or unset:\n%s' % (f, |
| 384 sorted(list(Key.base_user_settable_fields)))) |
| 385 metadata_plus.update(cust_metadata_plus) |
| 386 metadata_minus.update(cust_metadata_minus) |
| 387 return (metadata_minus, metadata_plus) |
| 388 |
| 389 # Test specification. See definition of test_steps in base class for |
| 390 # details on how to populate these fields. |
| 391 test_steps = [ |
| 392 # (test name, cmd line, ret code, (result_file, expect_file)) |
| 393 ('upload', 'gsutil -h "x-goog-meta-xyz:abc" ' |
| 394 '-h "Content-Type:image/gif" cp $F1 gs://$B1/$O1', 0, None), |
| 395 ('setup gif CT', 'echo image/gif >test_gif.ct', 0, None), |
| 396 ('setup html CT', 'echo text/html >test_html.ct', 0, None), |
| 397 ('setup META', 'echo "abc" >test.meta', 0, None), |
| 398 ('retrieve initial metadata', 'gsutil ls -L gs://$B1/$O1 >$F1', 0, None), |
| 399 ('verify initial Content-Type', 'grep Content-Type $F1 | cut -f3 >$F2', |
| 400 0, ('$F2', 'test_gif.ct')), |
| 401 ('verify initial x-goog-meta-xyz', |
| 402 'grep x-goog-meta-xyz $F1 | cut -f3 > $F2', 0, ('$F2', 'test.meta')), |
| 403 ('run setmeta', |
| 404 'gsutil setmeta -n -h Content-Type:text/html -h x-goog-meta-xyz ' |
| 405 'gs://$B1/$O1', 0, None), |
| 406 ('retrieve new metadata', 'gsutil ls -L gs://$B1/$O1 >$F1', 0, None), |
| 407 ('verify new Content-Type', 'grep Content-Type $F1 | cut -f3 >$F2', |
| 408 0, ('$F2', 'test_html.ct')), |
| 409 ('verify new x-goog-meta-xyz', 'grep -q xyz $F1', 1, None), |
| 410 # Test handling of various illegal setmeta commands. |
| 411 ('test missing header value', |
| 412 'gsutil setmeta \'"Content-Type"\' gs://$B1/$O1', 1, None), |
| 413 ('test value included with minus header', |
| 414 'gsutil setmeta \'"-Content-Type:text/html"\' gs://$B1/$O1', 1, None), |
| 415 ('test header included as both plus and minus header', |
| 416 'gsutil setmeta \'"Content-Type:text/html","-Content-Type"\' gs://$B1/$O1', |
| 417 1, None), |
| 418 ('test non-ASCII custom header', |
| 419 'gsutil setmeta \'"x-goog-meta-soufflé:5"\' gs://$B1/$O1', 1, None), |
| 420 ('test disallowed header', |
| 421 'gsutil setmeta \'"Content-Length:5"\' gs://$B1/$O1', 1, None), |
| 422 # |
| 423 # Older (deprecated) syntax tests: |
| 424 ('upload', 'gsutil -h "x-goog-meta-xyz:abc" ' |
| 425 '-h "Content-Type:image/gif" cp $F1 gs://$B1/$O1', 0, None), |
| 426 ('setup gif CT', 'echo image/gif >test_gif.ct', 0, None), |
| 427 ('setup html CT', 'echo text/html >test_html.ct', 0, None), |
| 428 ('setup META', 'echo "abc" >test.meta', 0, None), |
| 429 ('retrieve initial metadata', 'gsutil ls -L gs://$B1/$O1 >$F1', 0, None), |
| 430 ('verify initial Content-Type', 'grep Content-Type $F1 | cut -f3 >$F2', |
| 431 0, ('$F2', 'test_gif.ct')), |
| 432 ('verify initial x-goog-meta-xyz', |
| 433 'grep x-goog-meta-xyz $F1 | cut -f3 > $F2', 0, ('$F2', 'test.meta')), |
| 434 ('run setmeta (deprecated syntax)', |
| 435 'gsutil setmeta -n \'"Content-Type:text/html","-x-goog-meta-xyz"\' ' |
| 436 'gs://$B1/$O1', 0, None), |
| 437 ('retrieve new metadata', 'gsutil ls -L gs://$B1/$O1 >$F1', 0, None), |
| 438 ('verify new Content-Type', 'grep Content-Type $F1 | cut -f3 >$F2', |
| 439 0, ('$F2', 'test_html.ct')), |
| 440 ('verify new x-goog-meta-xyz', 'grep -q xyz $F1', 1, None), |
| 441 # Test handling of various illegal setmeta commands. |
| 442 ('test missing header value', |
| 443 'gsutil setmeta \'"Content-Type"\' gs://$B1/$O1', 1, None), |
| 444 ('test value included with minus header', |
| 445 'gsutil setmeta \'"-Content-Type:text/html"\' gs://$B1/$O1', 1, None), |
| 446 ('test header included as both plus and minus header', |
| 447 'gsutil setmeta \'"Content-Type:text/html","-Content-Type"\' gs://$B1/$O1', |
| 448 1, None), |
| 449 ('test non-ASCII custom header', |
| 450 'gsutil setmeta \'"x-goog-meta-soufflé:5"\' gs://$B1/$O1', 1, None), |
| 451 ('test disallowed header', |
| 452 'gsutil setmeta \'"Content-Length:5"\' gs://$B1/$O1', 1, None), |
| 453 # |
| 454 ('remove test files', 'rm -f test_gif.ct test_html.ct test.meta', 0, None), |
| 455 ] |
| 456 |
| 457 |
| 458 def _InsistAsciiHeader(header): |
| 459 if not all(ord(c) < 128 for c in header): |
| 460 raise CommandException('Invalid non-ASCII header (%s).' % header) |
| 461 |
| 462 def _IsCustomMeta(header): |
| 463 return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-') |
OLD | NEW |