OLD | NEW |
| (Empty) |
1 # Copyright 2012 Google Inc. All Rights Reserved. | |
2 #coding=utf8 | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 | |
16 import boto | |
17 import csv | |
18 import random | |
19 import StringIO | |
20 import time | |
21 | |
22 from boto.exception import GSResponseError | |
23 from boto.s3.key import Key | |
24 from gslib.command import COMMAND_NAME | |
25 from gslib.command import COMMAND_NAME_ALIASES | |
26 from gslib.command import CONFIG_REQUIRED | |
27 from gslib.command import Command | |
28 from gslib.command import FILE_URIS_OK | |
29 from gslib.command import MAX_ARGS | |
30 from gslib.command import MIN_ARGS | |
31 from gslib.command import PROVIDER_URIS_OK | |
32 from gslib.command import SUPPORTED_SUB_ARGS | |
33 from gslib.command import URIS_START_ARG | |
34 from gslib.exception import CommandException | |
35 from gslib.help_provider import HELP_NAME | |
36 from gslib.help_provider import HELP_NAME_ALIASES | |
37 from gslib.help_provider import HELP_ONE_LINE_SUMMARY | |
38 from gslib.help_provider import HELP_TEXT | |
39 from gslib.help_provider import HELP_TYPE | |
40 from gslib.help_provider import HelpType | |
41 from gslib.name_expansion import NameExpansionIterator | |
42 from gslib.util import NO_MAX | |
43 from gslib.util import Retry | |
44 | |
45 _detailed_help_text = (""" | |
46 <B>SYNOPSIS</B> | |
47 gsutil setmeta [-n] -h [header:value|header] ... uri... | |
48 | |
49 | |
50 <B>DESCRIPTION</B> | |
51 The gsutil setmeta command allows you to set or remove the metadata on one | |
52 or more objects. It takes one or more header arguments followed by one or | |
53 more URIs, where each header argument is in one of two forms: | |
54 | |
55 - if you specify header:value, it will set the given header on all | |
56 named objects. | |
57 | |
58 - if you specify header (with no value), it will remove the given header | |
59 from all named objects. | |
60 | |
61 For example, the following command would set the Content-Type and | |
62 Cache-Control and remove the Content-Disposition on the specified objects: | |
63 | |
64 gsutil setmeta -h "Content-Type:text/html" \\ | |
65 -h "Cache-Control:public, max-age=3600" \\ | |
66 -h "Content-Disposition" gs://bucket/*.html | |
67 | |
68 If you have a large number of objects to update you might want to use the | |
69 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) | |
70 update: | |
71 | |
72 gsutil -m setmeta -h "Content-Type:text/html" \\ | |
73 -h "Cache-Control:public, max-age=3600" \\ | |
74 -h "Content-Disposition" gs://bucket/*.html | |
75 | |
76 See "gsutil help metadata" for details about how you can set metadata | |
77 while uploading objects, what metadata fields can be set and the meaning of | |
78 these fields, use of custom metadata, and how to view currently set metadata. | |
79 | |
80 | |
81 <B>OPERATION COST</B> | |
82 This command uses four operations per URI (one to read the ACL, one to read | |
83 the current metadata, one to set the new metadata, and one to set the ACL). | |
84 | |
85 For cases where you want all objects to have the same ACL you can avoid half | |
86 these operations by setting a default ACL on the bucket(s) containing the | |
87 named objects, and using the setmeta -n option. See "help gsutil setdefacl". | |
88 | |
89 | |
90 <B>OPTIONS</B> | |
91 -h Specifies a header:value to be added, or header to be removed, | |
92 from each named object. | |
93 -n Causes the operations for reading and writing the ACL to be | |
94 skipped. This halves the number of operations performed per | |
95 request, improving the speed and reducing the cost of performing | |
96 the operations. This option makes sense for cases where you want | |
97 all objects to have the same ACL, for which you have set a default | |
98 ACL on the bucket(s) containing the objects. See "help gsutil | |
99 setdefacl". | |
100 | |
101 | |
102 <B>OLDER SYNTAX (DEPRECATED)</B> | |
103 The first version of the setmeta command used more complicated syntax | |
104 (described below). gsutil still supports this syntax, to avoid breaking | |
105 existing customer uses, but it is now deprecated and will eventually | |
106 be removed. | |
107 | |
108 With this older syntax, the setmeta command accepts a single metadata | |
109 argument in one of two forms: | |
110 | |
111 gsutil setmeta [-n] header:value uri... | |
112 | |
113 or | |
114 | |
115 gsutil setmeta [-n] '"header:value","-header",...' uri... | |
116 | |
117 The first form allows you to specify a single header name and value to | |
118 set. For example, the following command would set the Content-Type and | |
119 Cache-Control and remove the Content-Disposition on the specified objects: | |
120 | |
121 gsutil setmeta -h "Content-Type:text/html" \\ | |
122 -h "Cache-Control:public, max-age=3600" \\ | |
123 -h "Content-Disposition" gs://bucket/*.html | |
124 | |
125 This form only works if the header name and value don't contain double | |
126 quotes or commas, and only works for setting the header value (not for | |
127 removing it). | |
128 | |
129 The more general form of the first argument allows both setting and removing | |
130 multiple fields, without any of the content restrictions noted above. For | |
131 this variant the first argument is a CSV-formatted list of headers to add | |
132 or remove. Getting the CSV-formatted list to be passed correctly into gsutil | |
133 requires different syntax on Linux or MacOS than it does on Windows. | |
134 | |
135 On Linux or MacOS you need to surround the entire argument in single quotes | |
136 to avoid having the shell interpret/strip out the double-quotes in the CSV | |
137 data. For example, the following command would set the Content-Type and | |
138 Cache-Control and remove the Content-Disposition on the specified objects: | |
139 | |
140 gsutil setmeta '"Content-Type:text/html","Cache-Control:public, max-age=3600
","-Content-Disposition"' gs://bucket/*.html | |
141 | |
142 To pass CSV data on Windows you need two sets of double quotes around | |
143 each header/value pair, and one set of double quotes around the entire | |
144 expression. For example, the following command would set the Content-Type | |
145 and Cache-Control and remove the Content-Disposition on the specified objects: | |
146 | |
147 gsutil setmeta "\""Content-Type:text/html"",""Cache-Control:public, max-age=
3600"",""-Content-Disposition""\" gs://bucket/*.html | |
148 | |
149 | |
150 <B>WARNING ABOUT USING SETMETA WITH VERSIONING ENABLED</B> | |
151 | |
152 Note that if you use the gsutil setmeta command on an object in a bucket | |
153 with versioning enabled (see 'gsutil help versioning'), it will create | |
154 a new object version (and thus, you will get charged for the space required | |
155 for holding the additional version). | |
156 """) | |
157 | |
158 | |
159 class SetMetaCommand(Command): | |
160 """Implementation of gsutil setmeta command.""" | |
161 | |
162 # Command specification (processed by parent class). | |
163 command_spec = { | |
164 # Name of command. | |
165 COMMAND_NAME : 'setmeta', | |
166 # List of command name aliases. | |
167 COMMAND_NAME_ALIASES : ['setheader'], | |
168 # Min number of args required by this command. | |
169 MIN_ARGS : 1, | |
170 # Max number of args required by this command, or NO_MAX. | |
171 MAX_ARGS : NO_MAX, | |
172 # Getopt-style string specifying acceptable sub args. | |
173 SUPPORTED_SUB_ARGS : 'h:n', | |
174 # True if file URIs acceptable for this command. | |
175 FILE_URIS_OK : False, | |
176 # True if provider-only URIs acceptable for this command. | |
177 PROVIDER_URIS_OK : False, | |
178 # Index in args of first URI arg. | |
179 URIS_START_ARG : 1, | |
180 # True if must configure gsutil before running command. | |
181 CONFIG_REQUIRED : True, | |
182 } | |
183 help_spec = { | |
184 # Name of command or auxiliary help info for which this help applies. | |
185 HELP_NAME : 'setmeta', | |
186 # List of help name aliases. | |
187 HELP_NAME_ALIASES : ['setheader'], | |
188 # Type of help: | |
189 HELP_TYPE : HelpType.COMMAND_HELP, | |
190 # One line summary of this help. | |
191 HELP_ONE_LINE_SUMMARY : 'Set metadata on already uploaded objects', | |
192 # The full help text. | |
193 HELP_TEXT : _detailed_help_text, | |
194 } | |
195 | |
196 # Command entry point. | |
197 def RunCommand(self): | |
198 headers = [] | |
199 preserve_acl = True | |
200 if self.sub_opts: | |
201 for o, a in self.sub_opts: | |
202 if o == '-n': | |
203 preserve_acl = False | |
204 elif o == '-h': | |
205 headers.append(a) | |
206 | |
207 if headers: | |
208 (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) | |
209 uri_args = self.args | |
210 else: | |
211 (metadata_minus, metadata_plus) = self._ParseMetadataSpec(self.args[0]) | |
212 uri_args = self.args[1:] | |
213 | |
214 if (len(uri_args) == 1 | |
215 and not self.suri_builder.StorageUri(uri_args[0]).names_object()): | |
216 raise CommandException('URI (%s) must name an object' % uri_args[0]) | |
217 | |
218 # Used to track if any objects' metadata failed to be set. | |
219 self.everything_set_okay = True | |
220 | |
221 def _SetMetadataExceptionHandler(e): | |
222 """Simple exception handler to allow post-completion status.""" | |
223 self.THREADED_LOGGER.error(str(e)) | |
224 self.everything_set_okay = False | |
225 | |
226 @Retry(GSResponseError, tries=3, delay=1, backoff=2) | |
227 def _SetMetadataFunc(name_expansion_result): | |
228 exp_src_uri = self.suri_builder.StorageUri( | |
229 name_expansion_result.GetExpandedUriStr()) | |
230 self.THREADED_LOGGER.info('Setting metadata on %s...', exp_src_uri) | |
231 | |
232 key = exp_src_uri.get_key() | |
233 meta_generation = key.meta_generation | |
234 generation = key.generation | |
235 | |
236 headers = {} | |
237 if generation: | |
238 headers['x-goog-if-generation-match'] = generation | |
239 if meta_generation: | |
240 headers['x-goog-if-metageneration-match'] = meta_generation | |
241 | |
242 # If this fails because of a precondition, it will raise a | |
243 # GSResponseError for @Retry to handle. | |
244 exp_src_uri.set_metadata(metadata_plus, metadata_minus, preserve_acl, | |
245 headers=headers) | |
246 | |
247 name_expansion_iterator = NameExpansionIterator( | |
248 self.command_name, self.proj_id_handler, self.headers, self.debug, | |
249 self.bucket_storage_uri_class, uri_args, self.recursion_requested, | |
250 self.recursion_requested) | |
251 | |
252 # Perform requests in parallel (-m) mode, if requested, using | |
253 # configured number of parallel processes and threads. Otherwise, | |
254 # perform requests with sequential function calls in current process. | |
255 self.Apply(_SetMetadataFunc, name_expansion_iterator, | |
256 _SetMetadataExceptionHandler) | |
257 | |
258 if not self.everything_set_okay: | |
259 raise CommandException('Metadata for some objects could not be set.') | |
260 | |
261 return 0 | |
262 | |
263 def _ParseMetadataHeaders(self, headers): | |
264 metadata_minus = set() | |
265 cust_metadata_minus = set() | |
266 metadata_plus = {} | |
267 cust_metadata_plus = {} | |
268 # Build a count of the keys encountered from each plus and minus arg so we | |
269 # can check for dupe field specs. | |
270 num_metadata_plus_elems = 0 | |
271 num_cust_metadata_plus_elems = 0 | |
272 num_metadata_minus_elems = 0 | |
273 num_cust_metadata_minus_elems = 0 | |
274 | |
275 for md_arg in headers: | |
276 parts = md_arg.split(':') | |
277 if len(parts) not in (1, 2): | |
278 raise CommandException( | |
279 'Invalid argument: must be either header or header:value (%s)' % | |
280 md_arg) | |
281 if len(parts) == 2: | |
282 (header, value) = parts | |
283 else: | |
284 (header, value) = (parts[0], None) | |
285 _InsistAsciiHeader(header) | |
286 # Translate headers to lowercase to match the casing assumed by our | |
287 # sanity-checking operations. | |
288 header = header.lower() | |
289 if value: | |
290 if _IsCustomMeta(header): | |
291 # Allow non-ASCII data for custom metadata fields. Don't unicode | |
292 # encode other fields because that would perturb their content | |
293 # (e.g., adding %2F's into the middle of a Cache-Control value). | |
294 value = unicode(value, 'utf-8') | |
295 cust_metadata_plus[header] = value | |
296 num_cust_metadata_plus_elems += 1 | |
297 else: | |
298 metadata_plus[header] = value | |
299 num_metadata_plus_elems += 1 | |
300 else: | |
301 if _IsCustomMeta(header): | |
302 cust_metadata_minus.add(header) | |
303 num_cust_metadata_minus_elems += 1 | |
304 else: | |
305 metadata_minus.add(header) | |
306 num_metadata_minus_elems += 1 | |
307 if (num_metadata_plus_elems != len(metadata_plus) | |
308 or num_cust_metadata_plus_elems != len(cust_metadata_plus) | |
309 or num_metadata_minus_elems != len(metadata_minus) | |
310 or num_cust_metadata_minus_elems != len(cust_metadata_minus) | |
311 or metadata_minus.intersection(set(metadata_plus.keys()))): | |
312 raise CommandException('Each header must appear at most once.') | |
313 other_than_base_fields = (set(metadata_plus.keys()) | |
314 .difference(Key.base_user_settable_fields)) | |
315 other_than_base_fields.update( | |
316 metadata_minus.difference(Key.base_user_settable_fields)) | |
317 for f in other_than_base_fields: | |
318 # This check is overly simple; it would be stronger to check, for each | |
319 # URI argument, whether f.startswith the | |
320 # uri.get_provider().metadata_prefix, but here we just parse the spec | |
321 # once, before processing any of the URIs. This means we will not | |
322 # detect if the user tries to set an x-goog-meta- field on an another | |
323 # provider's object, for example. | |
324 if not _IsCustomMeta(f): | |
325 raise CommandException('Invalid or disallowed header (%s).\n' | |
326 'Only these fields (plus x-goog-meta-* fields)' | |
327 ' can be set or unset:\n%s' % (f, | |
328 sorted(list(Key.base_user_settable_fields)))) | |
329 metadata_plus.update(cust_metadata_plus) | |
330 metadata_minus.update(cust_metadata_minus) | |
331 return (metadata_minus, metadata_plus) | |
332 | |
333 def _ParseMetadataSpec(self, spec): | |
334 self.THREADED_LOGGER.info('WARNING: metadata spec syntax (%s)\nis ' | |
335 'deprecated and will eventually be removed.\n' | |
336 'Please see "gsutil help setmeta" for current ' | |
337 'syntax' % spec) | |
338 metadata_minus = set() | |
339 cust_metadata_minus = set() | |
340 metadata_plus = {} | |
341 cust_metadata_plus = {} | |
342 # Build a count of the keys encountered from each plus and minus arg so we | |
343 # can check for dupe field specs. | |
344 num_metadata_plus_elems = 0 | |
345 num_cust_metadata_plus_elems = 0 | |
346 num_metadata_minus_elems = 0 | |
347 num_cust_metadata_minus_elems = 0 | |
348 | |
349 mdf = StringIO.StringIO(spec) | |
350 for md_arg in csv.reader(mdf).next(): | |
351 if not md_arg: | |
352 raise CommandException( | |
353 'Invalid empty metadata specification component.') | |
354 if md_arg[0] == '-': | |
355 header = md_arg[1:] | |
356 if header.find(':') != -1: | |
357 raise CommandException('Removal spec may not contain ":" (%s).' % | |
358 header) | |
359 _InsistAsciiHeader(header) | |
360 # Translate headers to lowercase to match the casing required by | |
361 # uri.set_metadata(). | |
362 header = header.lower() | |
363 if _IsCustomMeta(header): | |
364 cust_metadata_minus.add(header) | |
365 num_cust_metadata_minus_elems += 1 | |
366 else: | |
367 metadata_minus.add(header) | |
368 num_metadata_minus_elems += 1 | |
369 else: | |
370 parts = md_arg.split(':', 1) | |
371 if len(parts) != 2: | |
372 raise CommandException( | |
373 'Fields being added must include values (%s).' % md_arg) | |
374 (header, value) = parts | |
375 _InsistAsciiHeader(header) | |
376 header = header.lower() | |
377 if _IsCustomMeta(header): | |
378 # Allow non-ASCII data for custom metadata fields. Don't unicode | |
379 # encode other fields because that would perturb their content | |
380 # (e.g., adding %2F's into the middle of a Cache-Control value). | |
381 value = unicode(value, 'utf-8') | |
382 cust_metadata_plus[header] = value | |
383 num_cust_metadata_plus_elems += 1 | |
384 else: | |
385 metadata_plus[header] = value | |
386 num_metadata_plus_elems += 1 | |
387 mdf.close() | |
388 if (num_metadata_plus_elems != len(metadata_plus) | |
389 or num_cust_metadata_plus_elems != len(cust_metadata_plus) | |
390 or num_metadata_minus_elems != len(metadata_minus) | |
391 or num_cust_metadata_minus_elems != len(cust_metadata_minus) | |
392 or metadata_minus.intersection(set(metadata_plus.keys()))): | |
393 raise CommandException('Each header must appear at most once.') | |
394 other_than_base_fields = (set(metadata_plus.keys()) | |
395 .difference(Key.base_user_settable_fields)) | |
396 other_than_base_fields.update( | |
397 metadata_minus.difference(Key.base_user_settable_fields)) | |
398 for f in other_than_base_fields: | |
399 # This check is overly simple; it would be stronger to check, for each | |
400 # URI argument, whether f.startswith the | |
401 # uri.get_provider().metadata_prefix, but here we just parse the spec | |
402 # once, before processing any of the URIs. This means we will not | |
403 # detect if the user tries to set an x-goog-meta- field on an another | |
404 # provider's object, for example. | |
405 if not _IsCustomMeta(f): | |
406 raise CommandException('Invalid or disallowed header (%s).\n' | |
407 'Only these fields (plus x-goog-meta-* fields)' | |
408 ' can be set or unset:\n%s' % (f, | |
409 sorted(list(Key.base_user_settable_fields)))) | |
410 metadata_plus.update(cust_metadata_plus) | |
411 metadata_minus.update(cust_metadata_minus) | |
412 return (metadata_minus, metadata_plus) | |
413 | |
414 | |
415 def _InsistAsciiHeader(header): | |
416 if not all(ord(c) < 128 for c in header): | |
417 raise CommandException('Invalid non-ASCII header (%s).' % header) | |
418 | |
419 def _IsCustomMeta(header): | |
420 return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-') | |
OLD | NEW |