OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2011 Google Inc. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 """Implementation of Unix-like rm command for cloud storage providers.""" | |
16 | |
17 from __future__ import absolute_import | |
18 | |
19 from gslib.cloud_api import NotEmptyException | |
20 from gslib.cloud_api import ServiceException | |
21 from gslib.command import Command | |
22 from gslib.command import GetFailureCount | |
23 from gslib.command import ResetFailureCount | |
24 from gslib.command_argument import CommandArgument | |
25 from gslib.cs_api_map import ApiSelector | |
26 from gslib.exception import CommandException | |
27 from gslib.name_expansion import NameExpansionIterator | |
28 from gslib.storage_url import StorageUrlFromString | |
29 from gslib.translation_helper import PreconditionsFromHeaders | |
30 from gslib.util import GetCloudApiInstance | |
31 from gslib.util import NO_MAX | |
32 from gslib.util import Retry | |
33 from gslib.util import StdinIterator | |
34 | |
35 | |
36 _SYNOPSIS = """ | |
37 gsutil rm [-f] [-r] url... | |
38 gsutil rm [-f] [-r] -I | |
39 """ | |
40 | |
41 _DETAILED_HELP_TEXT = (""" | |
42 <B>SYNOPSIS</B> | |
43 """ + _SYNOPSIS + """ | |
44 | |
45 | |
46 <B>DESCRIPTION</B> | |
47 The gsutil rm command removes objects. | |
48 For example, the command: | |
49 | |
50 gsutil rm gs://bucket/subdir/* | |
51 | |
52 will remove all objects in gs://bucket/subdir, but not in any of its | |
53 sub-directories. In contrast: | |
54 | |
55 gsutil rm gs://bucket/subdir/** | |
56 | |
57 will remove all objects under gs://bucket/subdir or any of its | |
58 subdirectories. | |
59 | |
60 You can also use the -r option to specify recursive object deletion. Thus, for | |
61 example, either of the following two commands will remove gs://bucket/subdir | |
62 and all objects and subdirectories under it: | |
63 | |
64 gsutil rm gs://bucket/subdir** | |
65 gsutil rm -r gs://bucket/subdir | |
66 | |
67 The -r option will also delete all object versions in the subdirectory for | |
68 versioning-enabled buckets, whereas the ** command will only delete the live | |
69 version of each object in the subdirectory. | |
70 | |
71 Running gsutil rm -r on a bucket will delete all versions of all objects in | |
72 the bucket, and then delete the bucket: | |
73 | |
74 gsutil rm -r gs://bucket | |
75 | |
76 If you want to delete all objects in the bucket, but not the bucket itself, | |
77 this command will work: | |
78 | |
79 gsutil rm gs://bucket/** | |
80 | |
81 If you have a large number of objects to remove you might want to use the | |
82 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) | |
83 removes: | |
84 | |
85 gsutil -m rm -r gs://my_bucket/subdir | |
86 | |
87 You can pass a list of URLs (one per line) to remove on stdin instead of as | |
88 command line arguments by using the -I option. This allows you to use gsutil | |
89 in a pipeline to remove objects identified by a program, such as: | |
90 | |
91 some_program | gsutil -m rm -I | |
92 | |
93 The contents of stdin can name cloud URLs and wildcards of cloud URLs. | |
94 | |
95 Note that gsutil rm will refuse to remove files from the local | |
96 file system. For example this will fail: | |
97 | |
98 gsutil rm *.txt | |
99 | |
100 WARNING: Object removal cannot be undone. Google Cloud Storage is designed | |
101 to give developers a high amount of flexibility and control over their data, | |
102 and Google maintains strict controls over the processing and purging of | |
103 deleted data. To protect yourself from mistakes, you can configure object | |
104 versioning on your bucket(s). See 'gsutil help versions' for details. | |
105 | |
106 | |
107 <B>DATA RESTORATION FROM ACCIDENTAL DELETION OR OVERWRITES</B> | |
108 Google Cloud Storage does not provide support for restoring data lost | |
109 or overwritten due to customer errors. If you have concerns that your | |
110 application software (or your users) may at some point erroneously delete or | |
111 overwrite data, you can protect yourself from that risk by enabling Object | |
112 Versioning (see "gsutil help versioning"). Doing so increases storage costs, | |
113 which can be partially mitigated by configuring Lifecycle Management to delete | |
114 older object versions (see "gsutil help lifecycle"). | |
115 | |
116 | |
117 <B>OPTIONS</B> | |
118 -f Continues silently (without printing error messages) despite | |
119 errors when removing multiple objects. If some of the objects | |
120 could not be removed, gsutil's exit status will be non-zero even | |
121 if this flag is set. This option is implicitly set when running | |
122 "gsutil -m rm ...". | |
123 | |
124 -I Causes gsutil to read the list of objects to remove from stdin. | |
125 This allows you to run a program that generates the list of | |
126 objects to remove. | |
127 | |
128 -R, -r Causes bucket or bucket subdirectory contents (all objects and | |
129 subdirectories that it contains) to be removed recursively. If | |
130 used with a bucket-only URL (like gs://bucket), after deleting | |
131 objects and subdirectories gsutil will delete the bucket. The -r | |
132 flag implies the -a flag and will delete all object versions. | |
133 | |
134 -a Delete all versions of an object. | |
135 """) | |
136 | |
137 | |
138 def _RemoveExceptionHandler(cls, e): | |
139 """Simple exception handler to allow post-completion status.""" | |
140 if not cls.continue_on_error: | |
141 cls.logger.error(str(e)) | |
142 cls.everything_removed_okay = False | |
143 | |
144 | |
145 # pylint: disable=unused-argument | |
146 def _RemoveFoldersExceptionHandler(cls, e): | |
147 """When removing folders, we don't mind if none exist.""" | |
148 if (isinstance(e, CommandException.__class__) and | |
149 'No URLs matched' in e.message): | |
150 pass | |
151 else: | |
152 raise e | |
153 | |
154 | |
155 def _RemoveFuncWrapper(cls, name_expansion_result, thread_state=None): | |
156 cls.RemoveFunc(name_expansion_result, thread_state=thread_state) | |
157 | |
158 | |
159 class RmCommand(Command): | |
160 """Implementation of gsutil rm command.""" | |
161 | |
162 # Command specification. See base class for documentation. | |
163 command_spec = Command.CreateCommandSpec( | |
164 'rm', | |
165 command_name_aliases=['del', 'delete', 'remove'], | |
166 usage_synopsis=_SYNOPSIS, | |
167 min_args=0, | |
168 max_args=NO_MAX, | |
169 supported_sub_args='afIrR', | |
170 file_url_ok=False, | |
171 provider_url_ok=False, | |
172 urls_start_arg=0, | |
173 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], | |
174 gs_default_api=ApiSelector.JSON, | |
175 argparse_arguments=[ | |
176 CommandArgument.MakeZeroOrMoreCloudURLsArgument() | |
177 ] | |
178 ) | |
179 # Help specification. See help_provider.py for documentation. | |
180 help_spec = Command.HelpSpec( | |
181 help_name='rm', | |
182 help_name_aliases=['del', 'delete', 'remove'], | |
183 help_type='command_help', | |
184 help_one_line_summary='Remove objects', | |
185 help_text=_DETAILED_HELP_TEXT, | |
186 subcommand_help_text={}, | |
187 ) | |
188 | |
189 def RunCommand(self): | |
190 """Command entry point for the rm command.""" | |
191 # self.recursion_requested is initialized in command.py (so it can be | |
192 # checked in parent class for all commands). | |
193 self.continue_on_error = False | |
194 self.read_args_from_stdin = False | |
195 self.all_versions = False | |
196 if self.sub_opts: | |
197 for o, unused_a in self.sub_opts: | |
198 if o == '-a': | |
199 self.all_versions = True | |
200 elif o == '-f': | |
201 self.continue_on_error = True | |
202 elif o == '-I': | |
203 self.read_args_from_stdin = True | |
204 elif o == '-r' or o == '-R': | |
205 self.recursion_requested = True | |
206 self.all_versions = True | |
207 | |
208 if self.read_args_from_stdin: | |
209 if self.args: | |
210 raise CommandException('No arguments allowed with the -I flag.') | |
211 url_strs = StdinIterator() | |
212 else: | |
213 if not self.args: | |
214 raise CommandException('The rm command (without -I) expects at ' | |
215 'least one URL.') | |
216 url_strs = self.args | |
217 | |
218 bucket_urls_to_delete = [] | |
219 bucket_strings_to_delete = [] | |
220 if self.recursion_requested: | |
221 bucket_fields = ['id'] | |
222 for url_str in url_strs: | |
223 url = StorageUrlFromString(url_str) | |
224 if url.IsBucket() or url.IsProvider(): | |
225 for blr in self.WildcardIterator(url_str).IterBuckets( | |
226 bucket_fields=bucket_fields): | |
227 bucket_urls_to_delete.append(blr.storage_url) | |
228 bucket_strings_to_delete.append(url_str) | |
229 | |
230 self.preconditions = PreconditionsFromHeaders(self.headers or {}) | |
231 | |
232 # Used to track if any files failed to be removed. | |
233 self.everything_removed_okay = True | |
234 | |
235 try: | |
236 # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. | |
237 name_expansion_iterator = NameExpansionIterator( | |
238 self.command_name, self.debug, self.logger, self.gsutil_api, | |
239 url_strs, self.recursion_requested, project_id=self.project_id, | |
240 all_versions=self.all_versions, | |
241 continue_on_error=self.continue_on_error or self.parallel_operations) | |
242 | |
243 # Perform remove requests in parallel (-m) mode, if requested, using | |
244 # configured number of parallel processes and threads. Otherwise, | |
245 # perform requests with sequential function calls in current process. | |
246 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, | |
247 _RemoveExceptionHandler, | |
248 fail_on_error=(not self.continue_on_error)) | |
249 | |
250 # Assuming the bucket has versioning enabled, url's that don't map to | |
251 # objects should throw an error even with all_versions, since the prior | |
252 # round of deletes only sends objects to a history table. | |
253 # This assumption that rm -a is only called for versioned buckets should be | |
254 # corrected, but the fix is non-trivial. | |
255 except CommandException as e: | |
256 # Don't raise if there are buckets to delete -- it's valid to say: | |
257 # gsutil rm -r gs://some_bucket | |
258 # if the bucket is empty. | |
259 if not bucket_urls_to_delete and not self.continue_on_error: | |
260 raise | |
261 # Reset the failure count if we failed due to an empty bucket that we're | |
262 # going to delete. | |
263 msg = 'No URLs matched: ' | |
264 if msg in str(e): | |
265 parts = str(e).split(msg) | |
266 if len(parts) == 2 and parts[1] in bucket_strings_to_delete: | |
267 ResetFailureCount() | |
268 except ServiceException, e: | |
269 if not self.continue_on_error: | |
270 raise | |
271 | |
272 if not self.everything_removed_okay and not self.continue_on_error: | |
273 raise CommandException('Some files could not be removed.') | |
274 | |
275 # If this was a gsutil rm -r command covering any bucket subdirs, | |
276 # remove any dir_$folder$ objects (which are created by various web UI | |
277 # tools to simulate folders). | |
278 if self.recursion_requested: | |
279 had_previous_failures = GetFailureCount() > 0 | |
280 folder_object_wildcards = [] | |
281 for url_str in url_strs: | |
282 url = StorageUrlFromString(url_str) | |
283 if url.IsObject(): | |
284 folder_object_wildcards.append('%s**_$folder$' % url_str) | |
285 if folder_object_wildcards: | |
286 self.continue_on_error = True | |
287 try: | |
288 name_expansion_iterator = NameExpansionIterator( | |
289 self.command_name, self.debug, | |
290 self.logger, self.gsutil_api, | |
291 folder_object_wildcards, self.recursion_requested, | |
292 project_id=self.project_id, | |
293 all_versions=self.all_versions) | |
294 # When we're removing folder objects, always continue on error | |
295 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, | |
296 _RemoveFoldersExceptionHandler, | |
297 fail_on_error=False) | |
298 except CommandException as e: | |
299 # Ignore exception from name expansion due to an absent folder file. | |
300 if not e.reason.startswith('No URLs matched:'): | |
301 raise | |
302 if not had_previous_failures: | |
303 ResetFailureCount() | |
304 | |
305 # Now that all data has been deleted, delete any bucket URLs. | |
306 for url in bucket_urls_to_delete: | |
307 self.logger.info('Removing %s...', url) | |
308 | |
309 @Retry(NotEmptyException, tries=3, timeout_secs=1) | |
310 def BucketDeleteWithRetry(): | |
311 self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) | |
312 | |
313 BucketDeleteWithRetry() | |
314 | |
315 return 0 | |
316 | |
317 def RemoveFunc(self, name_expansion_result, thread_state=None): | |
318 gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) | |
319 | |
320 exp_src_url = name_expansion_result.expanded_storage_url | |
321 self.logger.info('Removing %s...', exp_src_url) | |
322 gsutil_api.DeleteObject( | |
323 exp_src_url.bucket_name, exp_src_url.object_name, | |
324 preconditions=self.preconditions, generation=exp_src_url.generation, | |
325 provider=exp_src_url.scheme) | |
326 | |
OLD | NEW |