OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2011 Google Inc. All Rights Reserved. |
| 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at |
| 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. |
| 15 """Implementation of Unix-like rm command for cloud storage providers.""" |
| 16 |
| 17 from __future__ import absolute_import |
| 18 |
| 19 from gslib.cloud_api import NotEmptyException |
| 20 from gslib.cloud_api import ServiceException |
| 21 from gslib.command import Command |
| 22 from gslib.command import GetFailureCount |
| 23 from gslib.command import ResetFailureCount |
| 24 from gslib.command_argument import CommandArgument |
| 25 from gslib.cs_api_map import ApiSelector |
| 26 from gslib.exception import CommandException |
| 27 from gslib.name_expansion import NameExpansionIterator |
| 28 from gslib.storage_url import StorageUrlFromString |
| 29 from gslib.translation_helper import PreconditionsFromHeaders |
| 30 from gslib.util import GetCloudApiInstance |
| 31 from gslib.util import NO_MAX |
| 32 from gslib.util import Retry |
| 33 from gslib.util import StdinIterator |
| 34 |
| 35 |
| 36 _SYNOPSIS = """ |
| 37 gsutil rm [-f] [-r] url... |
| 38 gsutil rm [-f] [-r] -I |
| 39 """ |
| 40 |
| 41 _DETAILED_HELP_TEXT = (""" |
| 42 <B>SYNOPSIS</B> |
| 43 """ + _SYNOPSIS + """ |
| 44 |
| 45 |
| 46 <B>DESCRIPTION</B> |
| 47 The gsutil rm command removes objects. |
| 48 For example, the command: |
| 49 |
| 50 gsutil rm gs://bucket/subdir/* |
| 51 |
| 52 will remove all objects in gs://bucket/subdir, but not in any of its |
| 53 sub-directories. In contrast: |
| 54 |
| 55 gsutil rm gs://bucket/subdir/** |
| 56 |
| 57 will remove all objects under gs://bucket/subdir or any of its |
| 58 subdirectories. |
| 59 |
| 60 You can also use the -r option to specify recursive object deletion. Thus, for |
| 61 example, either of the following two commands will remove gs://bucket/subdir |
| 62 and all objects and subdirectories under it: |
| 63 |
| 64 gsutil rm gs://bucket/subdir** |
| 65 gsutil rm -r gs://bucket/subdir |
| 66 |
| 67 The -r option will also delete all object versions in the subdirectory for |
| 68 versioning-enabled buckets, whereas the ** command will only delete the live |
| 69 version of each object in the subdirectory. |
| 70 |
| 71 Running gsutil rm -r on a bucket will delete all versions of all objects in |
| 72 the bucket, and then delete the bucket: |
| 73 |
| 74 gsutil rm -r gs://bucket |
| 75 |
| 76 If you want to delete all objects in the bucket, but not the bucket itself, |
| 77 this command will work: |
| 78 |
| 79 gsutil rm gs://bucket/** |
| 80 |
| 81 If you have a large number of objects to remove you might want to use the |
| 82 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) |
| 83 removes: |
| 84 |
| 85 gsutil -m rm -r gs://my_bucket/subdir |
| 86 |
| 87 You can pass a list of URLs (one per line) to remove on stdin instead of as |
| 88 command line arguments by using the -I option. This allows you to use gsutil |
| 89 in a pipeline to remove objects identified by a program, such as: |
| 90 |
| 91 some_program | gsutil -m rm -I |
| 92 |
| 93 The contents of stdin can name cloud URLs and wildcards of cloud URLs. |
| 94 |
| 95 Note that gsutil rm will refuse to remove files from the local |
| 96 file system. For example this will fail: |
| 97 |
| 98 gsutil rm *.txt |
| 99 |
| 100 WARNING: Object removal cannot be undone. Google Cloud Storage is designed |
| 101 to give developers a high amount of flexibility and control over their data, |
| 102 and Google maintains strict controls over the processing and purging of |
| 103 deleted data. To protect yourself from mistakes, you can configure object |
| 104 versioning on your bucket(s). See 'gsutil help versions' for details. |
| 105 |
| 106 |
| 107 <B>DATA RESTORATION FROM ACCIDENTAL DELETION OR OVERWRITES</B> |
| 108 Google Cloud Storage does not provide support for restoring data lost |
| 109 or overwritten due to customer errors. If you have concerns that your |
| 110 application software (or your users) may at some point erroneously delete or |
| 111 overwrite data, you can protect yourself from that risk by enabling Object |
| 112 Versioning (see "gsutil help versioning"). Doing so increases storage costs, |
| 113 which can be partially mitigated by configuring Lifecycle Management to delete |
| 114 older object versions (see "gsutil help lifecycle"). |
| 115 |
| 116 |
| 117 <B>OPTIONS</B> |
| 118 -f Continues silently (without printing error messages) despite |
| 119 errors when removing multiple objects. If some of the objects |
| 120 could not be removed, gsutil's exit status will be non-zero even |
| 121 if this flag is set. This option is implicitly set when running |
| 122 "gsutil -m rm ...". |
| 123 |
| 124 -I Causes gsutil to read the list of objects to remove from stdin. |
| 125 This allows you to run a program that generates the list of |
| 126 objects to remove. |
| 127 |
| 128 -R, -r Causes bucket or bucket subdirectory contents (all objects and |
| 129 subdirectories that it contains) to be removed recursively. If |
| 130 used with a bucket-only URL (like gs://bucket), after deleting |
| 131 objects and subdirectories gsutil will delete the bucket. The -r |
| 132 flag implies the -a flag and will delete all object versions. |
| 133 |
| 134 -a Delete all versions of an object. |
| 135 """) |
| 136 |
| 137 |
| 138 def _RemoveExceptionHandler(cls, e): |
| 139 """Simple exception handler to allow post-completion status.""" |
| 140 if not cls.continue_on_error: |
| 141 cls.logger.error(str(e)) |
| 142 cls.everything_removed_okay = False |
| 143 |
| 144 |
| 145 # pylint: disable=unused-argument |
| 146 def _RemoveFoldersExceptionHandler(cls, e): |
| 147 """When removing folders, we don't mind if none exist.""" |
| 148 if (isinstance(e, CommandException.__class__) and |
| 149 'No URLs matched' in e.message): |
| 150 pass |
| 151 else: |
| 152 raise e |
| 153 |
| 154 |
| 155 def _RemoveFuncWrapper(cls, name_expansion_result, thread_state=None): |
| 156 cls.RemoveFunc(name_expansion_result, thread_state=thread_state) |
| 157 |
| 158 |
| 159 class RmCommand(Command): |
| 160 """Implementation of gsutil rm command.""" |
| 161 |
| 162 # Command specification. See base class for documentation. |
| 163 command_spec = Command.CreateCommandSpec( |
| 164 'rm', |
| 165 command_name_aliases=['del', 'delete', 'remove'], |
| 166 usage_synopsis=_SYNOPSIS, |
| 167 min_args=0, |
| 168 max_args=NO_MAX, |
| 169 supported_sub_args='afIrR', |
| 170 file_url_ok=False, |
| 171 provider_url_ok=False, |
| 172 urls_start_arg=0, |
| 173 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| 174 gs_default_api=ApiSelector.JSON, |
| 175 argparse_arguments=[ |
| 176 CommandArgument.MakeZeroOrMoreCloudURLsArgument() |
| 177 ] |
| 178 ) |
| 179 # Help specification. See help_provider.py for documentation. |
| 180 help_spec = Command.HelpSpec( |
| 181 help_name='rm', |
| 182 help_name_aliases=['del', 'delete', 'remove'], |
| 183 help_type='command_help', |
| 184 help_one_line_summary='Remove objects', |
| 185 help_text=_DETAILED_HELP_TEXT, |
| 186 subcommand_help_text={}, |
| 187 ) |
| 188 |
| 189 def RunCommand(self): |
| 190 """Command entry point for the rm command.""" |
| 191 # self.recursion_requested is initialized in command.py (so it can be |
| 192 # checked in parent class for all commands). |
| 193 self.continue_on_error = False |
| 194 self.read_args_from_stdin = False |
| 195 self.all_versions = False |
| 196 if self.sub_opts: |
| 197 for o, unused_a in self.sub_opts: |
| 198 if o == '-a': |
| 199 self.all_versions = True |
| 200 elif o == '-f': |
| 201 self.continue_on_error = True |
| 202 elif o == '-I': |
| 203 self.read_args_from_stdin = True |
| 204 elif o == '-r' or o == '-R': |
| 205 self.recursion_requested = True |
| 206 self.all_versions = True |
| 207 |
| 208 if self.read_args_from_stdin: |
| 209 if self.args: |
| 210 raise CommandException('No arguments allowed with the -I flag.') |
| 211 url_strs = StdinIterator() |
| 212 else: |
| 213 if not self.args: |
| 214 raise CommandException('The rm command (without -I) expects at ' |
| 215 'least one URL.') |
| 216 url_strs = self.args |
| 217 |
| 218 bucket_urls_to_delete = [] |
| 219 bucket_strings_to_delete = [] |
| 220 if self.recursion_requested: |
| 221 bucket_fields = ['id'] |
| 222 for url_str in url_strs: |
| 223 url = StorageUrlFromString(url_str) |
| 224 if url.IsBucket() or url.IsProvider(): |
| 225 for blr in self.WildcardIterator(url_str).IterBuckets( |
| 226 bucket_fields=bucket_fields): |
| 227 bucket_urls_to_delete.append(blr.storage_url) |
| 228 bucket_strings_to_delete.append(url_str) |
| 229 |
| 230 self.preconditions = PreconditionsFromHeaders(self.headers or {}) |
| 231 |
| 232 # Used to track if any files failed to be removed. |
| 233 self.everything_removed_okay = True |
| 234 |
| 235 try: |
| 236 # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. |
| 237 name_expansion_iterator = NameExpansionIterator( |
| 238 self.command_name, self.debug, self.logger, self.gsutil_api, |
| 239 url_strs, self.recursion_requested, project_id=self.project_id, |
| 240 all_versions=self.all_versions, |
| 241 continue_on_error=self.continue_on_error or self.parallel_operations) |
| 242 |
| 243 # Perform remove requests in parallel (-m) mode, if requested, using |
| 244 # configured number of parallel processes and threads. Otherwise, |
| 245 # perform requests with sequential function calls in current process. |
| 246 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, |
| 247 _RemoveExceptionHandler, |
| 248 fail_on_error=(not self.continue_on_error)) |
| 249 |
| 250 # Assuming the bucket has versioning enabled, url's that don't map to |
| 251 # objects should throw an error even with all_versions, since the prior |
| 252 # round of deletes only sends objects to a history table. |
| 253 # This assumption that rm -a is only called for versioned buckets should be |
| 254 # corrected, but the fix is non-trivial. |
| 255 except CommandException as e: |
| 256 # Don't raise if there are buckets to delete -- it's valid to say: |
| 257 # gsutil rm -r gs://some_bucket |
| 258 # if the bucket is empty. |
| 259 if not bucket_urls_to_delete and not self.continue_on_error: |
| 260 raise |
| 261 # Reset the failure count if we failed due to an empty bucket that we're |
| 262 # going to delete. |
| 263 msg = 'No URLs matched: ' |
| 264 if msg in str(e): |
| 265 parts = str(e).split(msg) |
| 266 if len(parts) == 2 and parts[1] in bucket_strings_to_delete: |
| 267 ResetFailureCount() |
| 268 except ServiceException, e: |
| 269 if not self.continue_on_error: |
| 270 raise |
| 271 |
| 272 if not self.everything_removed_okay and not self.continue_on_error: |
| 273 raise CommandException('Some files could not be removed.') |
| 274 |
| 275 # If this was a gsutil rm -r command covering any bucket subdirs, |
| 276 # remove any dir_$folder$ objects (which are created by various web UI |
| 277 # tools to simulate folders). |
| 278 if self.recursion_requested: |
| 279 had_previous_failures = GetFailureCount() > 0 |
| 280 folder_object_wildcards = [] |
| 281 for url_str in url_strs: |
| 282 url = StorageUrlFromString(url_str) |
| 283 if url.IsObject(): |
| 284 folder_object_wildcards.append('%s**_$folder$' % url_str) |
| 285 if folder_object_wildcards: |
| 286 self.continue_on_error = True |
| 287 try: |
| 288 name_expansion_iterator = NameExpansionIterator( |
| 289 self.command_name, self.debug, |
| 290 self.logger, self.gsutil_api, |
| 291 folder_object_wildcards, self.recursion_requested, |
| 292 project_id=self.project_id, |
| 293 all_versions=self.all_versions) |
| 294 # When we're removing folder objects, always continue on error |
| 295 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, |
| 296 _RemoveFoldersExceptionHandler, |
| 297 fail_on_error=False) |
| 298 except CommandException as e: |
| 299 # Ignore exception from name expansion due to an absent folder file. |
| 300 if not e.reason.startswith('No URLs matched:'): |
| 301 raise |
| 302 if not had_previous_failures: |
| 303 ResetFailureCount() |
| 304 |
| 305 # Now that all data has been deleted, delete any bucket URLs. |
| 306 for url in bucket_urls_to_delete: |
| 307 self.logger.info('Removing %s...', url) |
| 308 |
| 309 @Retry(NotEmptyException, tries=3, timeout_secs=1) |
| 310 def BucketDeleteWithRetry(): |
| 311 self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) |
| 312 |
| 313 BucketDeleteWithRetry() |
| 314 |
| 315 return 0 |
| 316 |
| 317 def RemoveFunc(self, name_expansion_result, thread_state=None): |
| 318 gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) |
| 319 |
| 320 exp_src_url = name_expansion_result.expanded_storage_url |
| 321 self.logger.info('Removing %s...', exp_src_url) |
| 322 gsutil_api.DeleteObject( |
| 323 exp_src_url.bucket_name, exp_src_url.object_name, |
| 324 preconditions=self.preconditions, generation=exp_src_url.generation, |
| 325 provider=exp_src_url.scheme) |
| 326 |
OLD | NEW |