OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2012 Google Inc. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 """Implementation of setmeta command for setting cloud object metadata.""" | |
16 | |
17 from __future__ import absolute_import | |
18 | |
19 from gslib.cloud_api import AccessDeniedException | |
20 from gslib.cloud_api import PreconditionException | |
21 from gslib.cloud_api import Preconditions | |
22 from gslib.command import Command | |
23 from gslib.command_argument import CommandArgument | |
24 from gslib.cs_api_map import ApiSelector | |
25 from gslib.exception import CommandException | |
26 from gslib.name_expansion import NameExpansionIterator | |
27 from gslib.storage_url import StorageUrlFromString | |
28 from gslib.translation_helper import CopyObjectMetadata | |
29 from gslib.translation_helper import ObjectMetadataFromHeaders | |
30 from gslib.translation_helper import PreconditionsFromHeaders | |
31 from gslib.util import GetCloudApiInstance | |
32 from gslib.util import NO_MAX | |
33 from gslib.util import Retry | |
34 | |
35 | |
36 _SYNOPSIS = """ | |
37 gsutil setmeta -h [header:value|header] ... url... | |
38 """ | |
39 | |
40 _DETAILED_HELP_TEXT = (""" | |
41 <B>SYNOPSIS</B> | |
42 """ + _SYNOPSIS + """ | |
43 | |
44 | |
45 <B>DESCRIPTION</B> | |
46 The gsutil setmeta command allows you to set or remove the metadata on one | |
47 or more objects. It takes one or more header arguments followed by one or | |
48 more URLs, where each header argument is in one of two forms: | |
49 | |
50 - if you specify header:value, it will set the given header on all | |
51 named objects. | |
52 | |
53 - if you specify header (with no value), it will remove the given header | |
54 from all named objects. | |
55 | |
56 For example, the following command would set the Content-Type and | |
57 Cache-Control and remove the Content-Disposition on the specified objects: | |
58 | |
59 gsutil setmeta -h "Content-Type:text/html" \\ | |
60 -h "Cache-Control:public, max-age=3600" \\ | |
61 -h "Content-Disposition" gs://bucket/*.html | |
62 | |
63 If you have a large number of objects to update you might want to use the | |
64 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) | |
65 update: | |
66 | |
67 gsutil -m setmeta -h "Content-Type:text/html" \\ | |
68 -h "Cache-Control:public, max-age=3600" \\ | |
69 -h "Content-Disposition" gs://bucket/*.html | |
70 | |
71 You can also use the setmeta command to set custom metadata on an object: | |
72 | |
73 gsutil setmeta -h "x-goog-meta-icecreamflavor:vanilla" gs://bucket/object | |
74 | |
75 See "gsutil help metadata" for details about how you can set metadata | |
76 while uploading objects, what metadata fields can be set and the meaning of | |
77 these fields, use of custom metadata, and how to view currently set metadata. | |
78 | |
79 NOTE: By default, publicly readable objects are served with a Cache-Control | |
80 header allowing such objects to be cached for 3600 seconds. For more details | |
81 about this default behavior see the CACHE-CONTROL section of | |
82 "gsutil help metadata". If you need to ensure that updates become visible | |
83 immediately, you should set a Cache-Control header of "Cache-Control:private, | |
84 max-age=0, no-transform" on such objects. You can do this with the command: | |
85 | |
86 gsutil setmeta -h "Content-Type:text/html" \\ | |
87 -h "Cache-Control:private, max-age=0, no-transform" gs://bucket/*.html | |
88 | |
89 The setmeta command reads each object's current generation and metageneration | |
90 and uses those as preconditions unless they are otherwise specified by | |
91 top-level arguments. For example: | |
92 | |
93 gsutil -h "x-goog-if-metageneration-match:2" setmeta | |
94 -h "x-goog-meta-icecreamflavor:vanilla" | |
95 | |
96 will set the icecreamflavor:vanilla metadata if the current live object has a | |
97 metageneration of 2. | |
98 | |
99 <B>OPTIONS</B> | |
100 -h Specifies a header:value to be added, or header to be removed, | |
101 from each named object. | |
102 """) | |
103 | |
104 # Setmeta assumes a header-like model which doesn't line up with the JSON way | |
105 # of doing things. This list comes from functionality that was supported by | |
106 # gsutil3 at the time gsutil4 was released. | |
107 SETTABLE_FIELDS = ['cache-control', 'content-disposition', | |
108 'content-encoding', 'content-language', | |
109 'content-md5', 'content-type'] | |
110 | |
111 | |
112 def _SetMetadataExceptionHandler(cls, e): | |
113 """Exception handler that maintains state about post-completion status.""" | |
114 cls.logger.error(e) | |
115 cls.everything_set_okay = False | |
116 | |
117 | |
118 def _SetMetadataFuncWrapper(cls, name_expansion_result, thread_state=None): | |
119 cls.SetMetadataFunc(name_expansion_result, thread_state=thread_state) | |
120 | |
121 | |
122 class SetMetaCommand(Command): | |
123 """Implementation of gsutil setmeta command.""" | |
124 | |
125 # Command specification. See base class for documentation. | |
126 command_spec = Command.CreateCommandSpec( | |
127 'setmeta', | |
128 command_name_aliases=['setheader'], | |
129 usage_synopsis=_SYNOPSIS, | |
130 min_args=1, | |
131 max_args=NO_MAX, | |
132 supported_sub_args='h:rR', | |
133 file_url_ok=False, | |
134 provider_url_ok=False, | |
135 urls_start_arg=1, | |
136 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], | |
137 gs_default_api=ApiSelector.JSON, | |
138 argparse_arguments=[ | |
139 CommandArgument.MakeZeroOrMoreCloudURLsArgument() | |
140 ] | |
141 ) | |
142 # Help specification. See help_provider.py for documentation. | |
143 help_spec = Command.HelpSpec( | |
144 help_name='setmeta', | |
145 help_name_aliases=['setheader'], | |
146 help_type='command_help', | |
147 help_one_line_summary='Set metadata on already uploaded objects', | |
148 help_text=_DETAILED_HELP_TEXT, | |
149 subcommand_help_text={}, | |
150 ) | |
151 | |
152 def RunCommand(self): | |
153 """Command entry point for the setmeta command.""" | |
154 headers = [] | |
155 if self.sub_opts: | |
156 for o, a in self.sub_opts: | |
157 if o == '-h': | |
158 if 'x-goog-acl' in a or 'x-amz-acl' in a: | |
159 raise CommandException( | |
160 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' | |
161 'set ... to set canned ACLs.') | |
162 headers.append(a) | |
163 | |
164 (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) | |
165 | |
166 self.metadata_change = metadata_plus | |
167 for header in metadata_minus: | |
168 self.metadata_change[header] = '' | |
169 | |
170 if len(self.args) == 1 and not self.recursion_requested: | |
171 url = StorageUrlFromString(self.args[0]) | |
172 if not (url.IsCloudUrl() and url.IsObject()): | |
173 raise CommandException('URL (%s) must name an object' % self.args[0]) | |
174 | |
175 # Used to track if any objects' metadata failed to be set. | |
176 self.everything_set_okay = True | |
177 | |
178 self.preconditions = PreconditionsFromHeaders(self.headers) | |
179 | |
180 name_expansion_iterator = NameExpansionIterator( | |
181 self.command_name, self.debug, self.logger, self.gsutil_api, | |
182 self.args, self.recursion_requested, all_versions=self.all_versions, | |
183 continue_on_error=self.parallel_operations) | |
184 | |
185 try: | |
186 # Perform requests in parallel (-m) mode, if requested, using | |
187 # configured number of parallel processes and threads. Otherwise, | |
188 # perform requests with sequential function calls in current process. | |
189 self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, | |
190 _SetMetadataExceptionHandler, fail_on_error=True) | |
191 except AccessDeniedException as e: | |
192 if e.status == 403: | |
193 self._WarnServiceAccounts() | |
194 raise | |
195 | |
196 if not self.everything_set_okay: | |
197 raise CommandException('Metadata for some objects could not be set.') | |
198 | |
199 return 0 | |
200 | |
201 @Retry(PreconditionException, tries=3, timeout_secs=1) | |
202 def SetMetadataFunc(self, name_expansion_result, thread_state=None): | |
203 """Sets metadata on an object. | |
204 | |
205 Args: | |
206 name_expansion_result: NameExpansionResult describing target object. | |
207 thread_state: gsutil Cloud API instance to use for the operation. | |
208 """ | |
209 gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) | |
210 | |
211 exp_src_url = name_expansion_result.expanded_storage_url | |
212 self.logger.info('Setting metadata on %s...', exp_src_url) | |
213 | |
214 fields = ['generation', 'metadata', 'metageneration'] | |
215 cloud_obj_metadata = gsutil_api.GetObjectMetadata( | |
216 exp_src_url.bucket_name, exp_src_url.object_name, | |
217 generation=exp_src_url.generation, provider=exp_src_url.scheme, | |
218 fields=fields) | |
219 | |
220 preconditions = Preconditions( | |
221 gen_match=self.preconditions.gen_match, | |
222 meta_gen_match=self.preconditions.meta_gen_match) | |
223 if preconditions.gen_match is None: | |
224 preconditions.gen_match = cloud_obj_metadata.generation | |
225 if preconditions.meta_gen_match is None: | |
226 preconditions.meta_gen_match = cloud_obj_metadata.metageneration | |
227 | |
228 # Patch handles the patch semantics for most metadata, but we need to | |
229 # merge the custom metadata field manually. | |
230 patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change) | |
231 | |
232 api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme) | |
233 # For XML we only want to patch through custom metadata that has | |
234 # changed. For JSON we need to build the complete set. | |
235 if api == ApiSelector.XML: | |
236 pass | |
237 elif api == ApiSelector.JSON: | |
238 CopyObjectMetadata(patch_obj_metadata, cloud_obj_metadata, | |
239 override=True) | |
240 patch_obj_metadata = cloud_obj_metadata | |
241 # Patch body does not need the object generation and metageneration. | |
242 patch_obj_metadata.generation = None | |
243 patch_obj_metadata.metageneration = None | |
244 | |
245 gsutil_api.PatchObjectMetadata( | |
246 exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata, | |
247 generation=exp_src_url.generation, preconditions=preconditions, | |
248 provider=exp_src_url.scheme) | |
249 | |
250 def _ParseMetadataHeaders(self, headers): | |
251 """Validates and parses metadata changes from the headers argument. | |
252 | |
253 Args: | |
254 headers: Header dict to validate and parse. | |
255 | |
256 Returns: | |
257 (metadata_plus, metadata_minus): Tuple of header sets to add and remove. | |
258 """ | |
259 metadata_minus = set() | |
260 cust_metadata_minus = set() | |
261 metadata_plus = {} | |
262 cust_metadata_plus = {} | |
263 # Build a count of the keys encountered from each plus and minus arg so we | |
264 # can check for dupe field specs. | |
265 num_metadata_plus_elems = 0 | |
266 num_cust_metadata_plus_elems = 0 | |
267 num_metadata_minus_elems = 0 | |
268 num_cust_metadata_minus_elems = 0 | |
269 | |
270 for md_arg in headers: | |
271 parts = md_arg.split(':') | |
272 if len(parts) not in (1, 2): | |
273 raise CommandException( | |
274 'Invalid argument: must be either header or header:value (%s)' % | |
275 md_arg) | |
276 if len(parts) == 2: | |
277 (header, value) = parts | |
278 else: | |
279 (header, value) = (parts[0], None) | |
280 _InsistAsciiHeader(header) | |
281 # Translate headers to lowercase to match the casing assumed by our | |
282 # sanity-checking operations. | |
283 header = header.lower() | |
284 if value: | |
285 if _IsCustomMeta(header): | |
286 # Allow non-ASCII data for custom metadata fields. | |
287 cust_metadata_plus[header] = value | |
288 num_cust_metadata_plus_elems += 1 | |
289 else: | |
290 # Don't unicode encode other fields because that would perturb their | |
291 # content (e.g., adding %2F's into the middle of a Cache-Control | |
292 # value). | |
293 _InsistAsciiHeaderValue(header, value) | |
294 value = str(value) | |
295 metadata_plus[header] = value | |
296 num_metadata_plus_elems += 1 | |
297 else: | |
298 if _IsCustomMeta(header): | |
299 cust_metadata_minus.add(header) | |
300 num_cust_metadata_minus_elems += 1 | |
301 else: | |
302 metadata_minus.add(header) | |
303 num_metadata_minus_elems += 1 | |
304 | |
305 if (num_metadata_plus_elems != len(metadata_plus) | |
306 or num_cust_metadata_plus_elems != len(cust_metadata_plus) | |
307 or num_metadata_minus_elems != len(metadata_minus) | |
308 or num_cust_metadata_minus_elems != len(cust_metadata_minus) | |
309 or metadata_minus.intersection(set(metadata_plus.keys()))): | |
310 raise CommandException('Each header must appear at most once.') | |
311 other_than_base_fields = (set(metadata_plus.keys()) | |
312 .difference(SETTABLE_FIELDS)) | |
313 other_than_base_fields.update( | |
314 metadata_minus.difference(SETTABLE_FIELDS)) | |
315 for f in other_than_base_fields: | |
316 # This check is overly simple; it would be stronger to check, for each | |
317 # URL argument, whether f.startswith the | |
318 # provider metadata_prefix, but here we just parse the spec | |
319 # once, before processing any of the URLs. This means we will not | |
320 # detect if the user tries to set an x-goog-meta- field on an another | |
321 # provider's object, for example. | |
322 if not _IsCustomMeta(f): | |
323 raise CommandException( | |
324 'Invalid or disallowed header (%s).\nOnly these fields (plus ' | |
325 'x-goog-meta-* fields) can be set or unset:\n%s' % ( | |
326 f, sorted(list(SETTABLE_FIELDS)))) | |
327 metadata_plus.update(cust_metadata_plus) | |
328 metadata_minus.update(cust_metadata_minus) | |
329 return (metadata_minus, metadata_plus) | |
330 | |
331 | |
332 def _InsistAscii(string, message): | |
333 if not all(ord(c) < 128 for c in string): | |
334 raise CommandException(message) | |
335 | |
336 | |
337 def _InsistAsciiHeader(header): | |
338 _InsistAscii(header, 'Invalid non-ASCII header (%s).' % header) | |
339 | |
340 | |
341 def _InsistAsciiHeaderValue(header, value): | |
342 _InsistAscii( | |
343 value, ('Invalid non-ASCII value (%s) was provided for header %s.' | |
344 % (value, header))) | |
345 | |
346 | |
347 def _IsCustomMeta(header): | |
348 return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-') | |
OLD | NEW |