| OLD | NEW |
| 1 # -*- coding: utf-8 -*- |
| 1 # Copyright 2011 Google Inc. All Rights Reserved. | 2 # Copyright 2011 Google Inc. All Rights Reserved. |
| 2 # | 3 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
| 6 # | 7 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # | 9 # |
| 9 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, | 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and | 13 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. | 14 # limitations under the License. |
| 15 """Implementation of Unix-like rm command for cloud storage providers.""" |
| 14 | 16 |
| 15 import boto | 17 from __future__ import absolute_import |
| 16 import textwrap | |
| 17 | 18 |
| 18 from boto.exception import GSResponseError | 19 from gslib.cloud_api import NotEmptyException |
| 20 from gslib.cloud_api import ServiceException |
| 19 from gslib.command import Command | 21 from gslib.command import Command |
| 20 from gslib.command import COMMAND_NAME | 22 from gslib.command import GetFailureCount |
| 21 from gslib.command import COMMAND_NAME_ALIASES | 23 from gslib.command import ResetFailureCount |
| 22 from gslib.command import FILE_URIS_OK | 24 from gslib.cs_api_map import ApiSelector |
| 23 from gslib.command import MAX_ARGS | |
| 24 from gslib.command import MIN_ARGS | |
| 25 from gslib.command import PROVIDER_URIS_OK | |
| 26 from gslib.command import SUPPORTED_SUB_ARGS | |
| 27 from gslib.command import URIS_START_ARG | |
| 28 from gslib.exception import CommandException | 25 from gslib.exception import CommandException |
| 29 from gslib.help_provider import HELP_NAME | |
| 30 from gslib.help_provider import HELP_NAME_ALIASES | |
| 31 from gslib.help_provider import HELP_ONE_LINE_SUMMARY | |
| 32 from gslib.help_provider import HELP_TEXT | |
| 33 from gslib.help_provider import HelpType | |
| 34 from gslib.help_provider import HELP_TYPE | |
| 35 from gslib.name_expansion import NameExpansionIterator | 26 from gslib.name_expansion import NameExpansionIterator |
| 27 from gslib.storage_url import StorageUrlFromString |
| 28 from gslib.util import GetCloudApiInstance |
| 36 from gslib.util import NO_MAX | 29 from gslib.util import NO_MAX |
| 30 from gslib.util import Retry |
| 37 | 31 |
| 38 _detailed_help_text = (""" | 32 |
| 33 _DETAILED_HELP_TEXT = (""" |
| 39 <B>SYNOPSIS</B> | 34 <B>SYNOPSIS</B> |
| 40 gsutil rm [-f] [-R] uri... | 35 gsutil rm [-f] [-R] url... |
| 41 | 36 |
| 42 | 37 |
| 43 <B>DESCRIPTION</B> | 38 <B>DESCRIPTION</B> |
| 44 The gsutil rm command removes objects. | 39 The gsutil rm command removes objects. |
| 45 For example, the command: | 40 For example, the command: |
| 46 | 41 |
| 47 gsutil rm gs://bucket/subdir/* | 42 gsutil rm gs://bucket/subdir/* |
| 48 | 43 |
| 49 will remove all objects in gs://bucket/subdir, but not in any of its | 44 will remove all objects in gs://bucket/subdir, but not in any of its |
| 50 sub-directories. In contrast: | 45 sub-directories. In contrast: |
| 51 | 46 |
| 52 gsutil rm gs://bucket/subdir/** | 47 gsutil rm gs://bucket/subdir/** |
| 53 | 48 |
| 54 will remove all objects under gs://bucket/subdir or any of its | 49 will remove all objects under gs://bucket/subdir or any of its |
| 55 subdirectories. | 50 subdirectories. |
| 56 | 51 |
| 57 You can also use the -R option to specify recursive object deletion. Thus, for | 52 You can also use the -R option to specify recursive object deletion. Thus, for |
| 58 example, either of the following two commands will remove gs://bucket/subdir | 53 example, either of the following two commands will remove gs://bucket/subdir |
| 59 and all objects and subdirectories under it: | 54 and all objects and subdirectories under it: |
| 60 | 55 |
| 61 gsutil rm gs://bucket/subdir** | 56 gsutil rm gs://bucket/subdir** |
| 62 gsutil rm -R gs://bucket/subdir | 57 gsutil rm -R gs://bucket/subdir |
| 63 | 58 |
| 64 Running gsutil rm -R on a bucket will delete all objects in the bucket, and | 59 The -R option will also delete all object versions in the subdirectory for |
| 65 then delete the bucket: | 60 versioning-enabled buckets, whereas the ** command will only delete the live |
| 61 version of each object in the subdirectory. |
| 62 |
| 63 Running gsutil rm -R on a bucket will delete all versions of all objects in |
| 64 the bucket, and then delete the bucket: |
| 66 | 65 |
| 67 gsutil rm -R gs://bucket | 66 gsutil rm -R gs://bucket |
| 68 | 67 |
| 69 If you want to delete all objects in the bucket, but not the bucket itself, | 68 If you want to delete all objects in the bucket, but not the bucket itself, |
| 70 this command will work: | 69 this command will work: |
| 71 | 70 |
| 72 gsutil rm gs://bucket/** | 71 gsutil rm gs://bucket/** |
| 73 | 72 |
| 74 If you have a large number of objects to remove you might want to use the | 73 If you have a large number of objects to remove you might want to use the |
| 75 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) | 74 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) |
| 76 removes: | 75 removes: |
| 77 | 76 |
| 78 gsutil -m rm -R gs://my_bucket/subdir | 77 gsutil -m rm -R gs://my_bucket/subdir |
| 79 | 78 |
| 80 Note that gsutil rm will refuse to remove files from the local | 79 Note that gsutil rm will refuse to remove files from the local |
| 81 file system. For example this will fail: | 80 file system. For example this will fail: |
| 82 | 81 |
| 83 gsutil rm *.txt | 82 gsutil rm *.txt |
| 84 | 83 |
| 85 WARNING: Object removal cannot be undone. Google Cloud Storage is designed | 84 WARNING: Object removal cannot be undone. Google Cloud Storage is designed |
| 86 to give developers a high amount of flexibility and control over their data, | 85 to give developers a high amount of flexibility and control over their data, |
| 87 and Google maintains strict controls over the processing and purging of | 86 and Google maintains strict controls over the processing and purging of |
| 88 deleted data. To protect yourself from mistakes, you can configure object | 87 deleted data. To protect yourself from mistakes, you can configure object |
| 89 versioning on your bucket(s). See 'gsutil help versions' for details. | 88 versioning on your bucket(s). See 'gsutil help versions' for details. |
| 90 | 89 |
| 91 | 90 |
| 92 <B>OPTIONS</B> | 91 <B>OPTIONS</B> |
| 93 -f Continues silently (without printing error messages) despite | 92 -f Continues silently (without printing error messages) despite |
| 94 errors when removing multiple objects. With this option the gsutil | 93 errors when removing multiple objects. If some of the objects |
| 95 exit status will be 0 even if some objects couldn't be removed. | 94 could not be removed, gsutil's exit status will be non-zero even |
| 95 if this flag is set. This option is implicitly set when running |
| 96 "gsutil -m rm ...". |
| 96 | 97 |
| 97 -R, -r Causes bucket contents to be removed recursively (i.e., including | 98 -R, -r Causes bucket or bucket subdirectory contents (all objects and |
| 98 all objects and subdirectories). If used with a bucket-only URI | 99 subdirectories that it contains) to be removed recursively. If |
| 99 (like gs://bucket), after deleting objects and subdirectories | 100 used with a bucket-only URL (like gs://bucket), after deleting |
| 100 gsutil will delete the bucket. | 101 objects and subdirectories gsutil will delete the bucket. The -r |
| 102 flag implies the -a flag and will delete all object versions. |
| 101 | 103 |
| 102 -a Delete all versions of an object. | 104 -a Delete all versions of an object. |
| 103 """) | 105 """) |
| 104 | 106 |
| 107 |
| 105 def _RemoveExceptionHandler(cls, e): | 108 def _RemoveExceptionHandler(cls, e): |
| 106 """Simple exception handler to allow post-completion status.""" | 109 """Simple exception handler to allow post-completion status.""" |
| 107 cls.logger.error(str(e)) | 110 if not cls.continue_on_error: |
| 111 cls.logger.error(str(e)) |
| 108 cls.everything_removed_okay = False | 112 cls.everything_removed_okay = False |
| 109 | 113 |
| 110 def _RemoveFuncWrapper(cls, name_expansion_result): | 114 |
| 111 cls._RemoveFunc(name_expansion_result) | 115 # pylint: disable=unused-argument |
| 116 def _RemoveFoldersExceptionHandler(cls, e): |
| 117 """When removing folders, we don't mind if none exist.""" |
| 118 if (isinstance(e, CommandException.__class__) and |
| 119 'No URLs matched' in e.message): |
| 120 pass |
| 121 else: |
| 122 raise e |
| 123 |
| 124 |
| 125 def _RemoveFuncWrapper(cls, name_expansion_result, thread_state=None): |
| 126 cls.RemoveFunc(name_expansion_result, thread_state=thread_state) |
| 112 | 127 |
| 113 | 128 |
| 114 class RmCommand(Command): | 129 class RmCommand(Command): |
| 115 """Implementation of gsutil rm command.""" | 130 """Implementation of gsutil rm command.""" |
| 116 | 131 |
| 117 # Command specification (processed by parent class). | 132 # Command specification. See base class for documentation. |
| 118 command_spec = { | 133 command_spec = Command.CreateCommandSpec( |
| 119 # Name of command. | 134 'rm', |
| 120 COMMAND_NAME : 'rm', | 135 command_name_aliases=['del', 'delete', 'remove'], |
| 121 # List of command name aliases. | 136 min_args=1, |
| 122 COMMAND_NAME_ALIASES : ['del', 'delete', 'remove'], | 137 max_args=NO_MAX, |
| 123 # Min number of args required by this command. | 138 supported_sub_args='afrR', |
| 124 MIN_ARGS : 1, | 139 file_url_ok=False, |
| 125 # Max number of args required by this command, or NO_MAX. | 140 provider_url_ok=False, |
| 126 MAX_ARGS : NO_MAX, | 141 urls_start_arg=0, |
| 127 # Getopt-style string specifying acceptable sub args. | 142 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| 128 SUPPORTED_SUB_ARGS : 'afrRv', | 143 gs_default_api=ApiSelector.JSON, |
| 129 # True if file URIs acceptable for this command. | 144 ) |
| 130 FILE_URIS_OK : False, | 145 # Help specification. See help_provider.py for documentation. |
| 131 # True if provider-only URIs acceptable for this command. | 146 help_spec = Command.HelpSpec( |
| 132 PROVIDER_URIS_OK : False, | 147 help_name='rm', |
| 133 # Index in args of first URI arg. | 148 help_name_aliases=['del', 'delete', 'remove'], |
| 134 URIS_START_ARG : 0, | 149 help_type='command_help', |
| 135 } | 150 help_one_line_summary='Remove objects', |
| 136 help_spec = { | 151 help_text=_DETAILED_HELP_TEXT, |
| 137 # Name of command or auxiliary help info for which this help applies. | 152 subcommand_help_text={}, |
| 138 HELP_NAME : 'rm', | 153 ) |
| 139 # List of help name aliases. | |
| 140 HELP_NAME_ALIASES : ['del', 'delete', 'remove'], | |
| 141 # Type of help: | |
| 142 HELP_TYPE : HelpType.COMMAND_HELP, | |
| 143 # One line summary of this help. | |
| 144 HELP_ONE_LINE_SUMMARY : 'Remove objects', | |
| 145 # The full help text. | |
| 146 HELP_TEXT : _detailed_help_text, | |
| 147 } | |
| 148 | 154 |
| 149 # Command entry point. | |
| 150 def RunCommand(self): | 155 def RunCommand(self): |
| 151 # self.recursion_requested initialized in command.py (so can be checked | 156 """Command entry point for the rm command.""" |
| 152 # in parent class for all commands). | 157 # self.recursion_requested is initialized in command.py (so it can be |
| 158 # checked in parent class for all commands). |
| 153 self.continue_on_error = False | 159 self.continue_on_error = False |
| 154 self.all_versions = False | 160 self.all_versions = False |
| 155 if self.sub_opts: | 161 if self.sub_opts: |
| 156 for o, unused_a in self.sub_opts: | 162 for o, unused_a in self.sub_opts: |
| 157 if o == '-a': | 163 if o == '-a': |
| 158 self.all_versions = True | 164 self.all_versions = True |
| 159 elif o == '-f': | 165 elif o == '-f': |
| 160 self.continue_on_error = True | 166 self.continue_on_error = True |
| 161 elif o == '-r' or o == '-R': | 167 elif o == '-r' or o == '-R': |
| 162 self.recursion_requested = True | 168 self.recursion_requested = True |
| 163 elif o == '-v': | 169 self.all_versions = True |
| 164 self.logger.info('WARNING: The %s -v option is no longer' | |
| 165 ' needed, and will eventually be removed.\n' | |
| 166 % self.command_name) | |
| 167 | 170 |
| 168 if self.recursion_requested and not self.all_versions: | 171 bucket_urls_to_delete = [] |
| 169 for uri_str in self.args: | 172 bucket_strings_to_delete = [] |
| 170 # WildcardIterator returns BucketListingRefs. | 173 if self.recursion_requested: |
| 171 for blr in self.WildcardIterator(uri_str): | 174 bucket_fields = ['id'] |
| 172 uri = blr.GetUri() | 175 for url_str in self.args: |
| 173 if uri.names_bucket() and uri.get_versioning_config(): | 176 url = StorageUrlFromString(url_str) |
| 174 raise CommandException( | 177 if url.IsBucket() or url.IsProvider(): |
| 175 'Running gsutil rm -R on a bucket-only URI (%s)\nwith ' | 178 for blr in self.WildcardIterator(url_str).IterBuckets( |
| 176 'versioning enabled will not work without specifying the -a ' | 179 bucket_fields=bucket_fields): |
| 177 'flag. Please try\nagain, using:\n\tgsutil rm -Ra %s' | 180 bucket_urls_to_delete.append(blr.storage_url) |
| 178 % (uri_str,' '.join(self.args))) | 181 bucket_strings_to_delete.append(url_str) |
| 179 | 182 |
| 180 # Used to track if any files failed to be removed. | 183 # Used to track if any files failed to be removed. |
| 181 self.everything_removed_okay = True | 184 self.everything_removed_okay = True |
| 182 | 185 |
| 183 bucket_uris_to_delete = [] | 186 try: |
| 184 if self.recursion_requested: | 187 # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. |
| 185 for uri_str in self.args: | 188 name_expansion_iterator = NameExpansionIterator( |
| 186 for blr in self.WildcardIterator(uri_str): | 189 self.command_name, self.debug, self.logger, self.gsutil_api, |
| 187 uri = blr.GetUri() | 190 self.args, self.recursion_requested, project_id=self.project_id, |
| 188 if uri.names_bucket(): | 191 all_versions=self.all_versions, |
| 189 bucket_uris_to_delete.append(uri) | 192 continue_on_error=self.continue_on_error or self.parallel_operations) |
| 190 | 193 |
| 191 try: | |
| 192 # Expand wildcards, dirs, buckets, and bucket subdirs in URIs. | |
| 193 name_expansion_iterator = NameExpansionIterator( | |
| 194 self.command_name, self.proj_id_handler, self.headers, self.debug, | |
| 195 self.logger, self.bucket_storage_uri_class, self.args, | |
| 196 self.recursion_requested, flat=self.recursion_requested, | |
| 197 all_versions=self.all_versions) | |
| 198 # Perform remove requests in parallel (-m) mode, if requested, using | 194 # Perform remove requests in parallel (-m) mode, if requested, using |
| 199 # configured number of parallel processes and threads. Otherwise, | 195 # configured number of parallel processes and threads. Otherwise, |
| 200 # perform requests with sequential function calls in current process. | 196 # perform requests with sequential function calls in current process. |
| 201 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, | 197 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, |
| 202 _RemoveExceptionHandler, | 198 _RemoveExceptionHandler, |
| 203 fail_on_error=(not self.continue_on_error)) | 199 fail_on_error=(not self.continue_on_error)) |
| 204 | 200 |
| 205 # Assuming the bucket has versioning enabled, uri's that don't map to | 201 # Assuming the bucket has versioning enabled, url's that don't map to |
| 206 # objects should throw an error even with all_versions, since the prior | 202 # objects should throw an error even with all_versions, since the prior |
| 207 # round of deletes only sends objects to a history table. | 203 # round of deletes only sends objects to a history table. |
| 208 # This assumption that rm -a is only called for versioned buckets should be | 204 # This assumption that rm -a is only called for versioned buckets should be |
| 209 # corrected, but the fix is non-trivial. | 205 # corrected, but the fix is non-trivial. |
| 210 except CommandException as e: | 206 except CommandException as e: |
| 211 # Don't raise if there are buckets to delete -- it's valid to say: | 207 # Don't raise if there are buckets to delete -- it's valid to say: |
| 212 # gsutil rm -r gs://some_bucket | 208 # gsutil rm -r gs://some_bucket |
| 213 # if the bucket is empty. | 209 # if the bucket is empty. |
| 214 if not bucket_uris_to_delete and not self.continue_on_error: | 210 if not bucket_urls_to_delete and not self.continue_on_error: |
| 215 raise | 211 raise |
| 216 except GSResponseError, e: | 212 # Reset the failure count if we failed due to an empty bucket that we're |
| 213 # going to delete. |
| 214 msg = 'No URLs matched: ' |
| 215 if msg in str(e): |
| 216 parts = str(e).split(msg) |
| 217 if len(parts) == 2 and parts[1] in bucket_strings_to_delete: |
| 218 ResetFailureCount() |
| 219 except ServiceException, e: |
| 217 if not self.continue_on_error: | 220 if not self.continue_on_error: |
| 218 raise | 221 raise |
| 219 | 222 |
| 220 if not self.everything_removed_okay and not self.continue_on_error: | 223 if not self.everything_removed_okay and not self.continue_on_error: |
| 221 raise CommandException('Some files could not be removed.') | 224 raise CommandException('Some files could not be removed.') |
| 222 | 225 |
| 223 # If this was a gsutil rm -r command covering any bucket subdirs, | 226 # If this was a gsutil rm -r command covering any bucket subdirs, |
| 224 # remove any dir_$folder$ objects (which are created by various web UI | 227 # remove any dir_$folder$ objects (which are created by various web UI |
| 225 # tools to simulate folders). | 228 # tools to simulate folders). |
| 226 if self.recursion_requested: | 229 if self.recursion_requested: |
| 230 had_previous_failures = GetFailureCount() > 0 |
| 227 folder_object_wildcards = [] | 231 folder_object_wildcards = [] |
| 228 for uri_str in self.args: | 232 for url_str in self.args: |
| 229 uri = self.suri_builder.StorageUri(uri_str) | 233 url = StorageUrlFromString(url_str) |
| 230 if uri.names_object: | 234 if url.IsObject(): |
| 231 folder_object_wildcards.append('%s**_$folder$' % uri) | 235 folder_object_wildcards.append('%s**_$folder$' % url_str) |
| 232 if len(folder_object_wildcards): | 236 if folder_object_wildcards: |
| 233 self.continue_on_error = True | 237 self.continue_on_error = True |
| 234 try: | 238 try: |
| 235 name_expansion_iterator = NameExpansionIterator( | 239 name_expansion_iterator = NameExpansionIterator( |
| 236 self.command_name, self.proj_id_handler, self.headers, self.debug, | 240 self.command_name, self.debug, |
| 237 self.logger, self.bucket_storage_uri_class, | 241 self.logger, self.gsutil_api, |
| 238 folder_object_wildcards, self.recursion_requested, flat=True, | 242 folder_object_wildcards, self.recursion_requested, |
| 243 project_id=self.project_id, |
| 239 all_versions=self.all_versions) | 244 all_versions=self.all_versions) |
| 245 # When we're removing folder objects, always continue on error |
| 240 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, | 246 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, |
| 241 _RemoveExceptionHandler, | 247 _RemoveFoldersExceptionHandler, |
| 242 fail_on_error=(not self.continue_on_error)) | 248 fail_on_error=False) |
| 243 except CommandException as e: | 249 except CommandException as e: |
| 244 # Ignore exception from name expansion due to an absent folder file. | 250 # Ignore exception from name expansion due to an absent folder file. |
| 245 if not e.reason.startswith('No URIs matched:'): | 251 if not e.reason.startswith('No URLs matched:'): |
| 246 raise | 252 raise |
| 253 if not had_previous_failures: |
| 254 ResetFailureCount() |
| 247 | 255 |
| 248 # Now that all data has been deleted, delete any bucket URIs. | 256 # Now that all data has been deleted, delete any bucket URLs. |
| 249 for uri in bucket_uris_to_delete: | 257 for url in bucket_urls_to_delete: |
| 250 self.logger.info('Removing %s...', uri) | 258 self.logger.info('Removing %s...', url) |
| 251 uri.delete_bucket(self.headers) | 259 |
| 260 @Retry(NotEmptyException, tries=3, timeout_secs=1) |
| 261 def BucketDeleteWithRetry(): |
| 262 self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) |
| 263 |
| 264 BucketDeleteWithRetry() |
| 265 |
| 252 return 0 | 266 return 0 |
| 253 | 267 |
| 254 def _RemoveFunc(self, name_expansion_result): | 268 def RemoveFunc(self, name_expansion_result, thread_state=None): |
| 255 exp_src_uri = self.suri_builder.StorageUri( | 269 gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) |
| 256 name_expansion_result.GetExpandedUriStr(), | 270 |
| 257 is_latest=name_expansion_result.is_latest) | 271 exp_src_url = name_expansion_result.expanded_storage_url |
| 258 | 272 self.logger.info('Removing %s...', exp_src_url) |
| 259 self.logger.info('Removing %s...', name_expansion_result.expanded_uri_str) | 273 gsutil_api.DeleteObject( |
| 260 try: | 274 exp_src_url.bucket_name, exp_src_url.object_name, |
| 261 exp_src_uri.delete_key(validate=False, headers=self.headers) | 275 generation=exp_src_url.generation, provider=exp_src_url.scheme) |
| 262 except: | 276 |
| 263 if self.continue_on_error: | |
| 264 self.everything_removed_okay = False | |
| 265 else: | |
| 266 raise | |
| OLD | NEW |