Index: third_party/gsutil/gslib/commands/rm.py |
diff --git a/third_party/gsutil/gslib/commands/rm.py b/third_party/gsutil/gslib/commands/rm.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..721314adbd4b30fdfdddb70b34f73d08009eafa4 |
--- /dev/null |
+++ b/third_party/gsutil/gslib/commands/rm.py |
@@ -0,0 +1,326 @@ |
+# -*- coding: utf-8 -*- |
+# Copyright 2011 Google Inc. All Rights Reserved. |
+# |
+# Licensed under the Apache License, Version 2.0 (the "License"); |
+# you may not use this file except in compliance with the License. |
+# You may obtain a copy of the License at |
+# |
+# http://www.apache.org/licenses/LICENSE-2.0 |
+# |
+# Unless required by applicable law or agreed to in writing, software |
+# distributed under the License is distributed on an "AS IS" BASIS, |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
+# See the License for the specific language governing permissions and |
+# limitations under the License. |
+"""Implementation of Unix-like rm command for cloud storage providers.""" |
+ |
+from __future__ import absolute_import |
+ |
+from gslib.cloud_api import NotEmptyException |
+from gslib.cloud_api import ServiceException |
+from gslib.command import Command |
+from gslib.command import GetFailureCount |
+from gslib.command import ResetFailureCount |
+from gslib.command_argument import CommandArgument |
+from gslib.cs_api_map import ApiSelector |
+from gslib.exception import CommandException |
+from gslib.name_expansion import NameExpansionIterator |
+from gslib.storage_url import StorageUrlFromString |
+from gslib.translation_helper import PreconditionsFromHeaders |
+from gslib.util import GetCloudApiInstance |
+from gslib.util import NO_MAX |
+from gslib.util import Retry |
+from gslib.util import StdinIterator |
+ |
+ |
+_SYNOPSIS = """ |
+ gsutil rm [-f] [-r] url... |
+ gsutil rm [-f] [-r] -I |
+""" |
+ |
+_DETAILED_HELP_TEXT = (""" |
+<B>SYNOPSIS</B> |
+""" + _SYNOPSIS + """ |
+ |
+ |
+<B>DESCRIPTION</B> |
+ The gsutil rm command removes objects. |
+ For example, the command: |
+ |
+ gsutil rm gs://bucket/subdir/* |
+ |
+ will remove all objects in gs://bucket/subdir, but not in any of its |
+ sub-directories. In contrast: |
+ |
+ gsutil rm gs://bucket/subdir/** |
+ |
+ will remove all objects under gs://bucket/subdir or any of its |
+ subdirectories. |
+ |
+ You can also use the -r option to specify recursive object deletion. Thus, for |
+ example, either of the following two commands will remove gs://bucket/subdir |
+ and all objects and subdirectories under it: |
+ |
+ gsutil rm gs://bucket/subdir** |
+ gsutil rm -r gs://bucket/subdir |
+ |
+ The -r option will also delete all object versions in the subdirectory for |
+ versioning-enabled buckets, whereas the ** command will only delete the live |
+ version of each object in the subdirectory. |
+ |
+ Running gsutil rm -r on a bucket will delete all versions of all objects in |
+ the bucket, and then delete the bucket: |
+ |
+ gsutil rm -r gs://bucket |
+ |
+ If you want to delete all objects in the bucket, but not the bucket itself, |
+ this command will work: |
+ |
+ gsutil rm gs://bucket/** |
+ |
+ If you have a large number of objects to remove you might want to use the |
+ gsutil -m option, to perform a parallel (multi-threaded/multi-processing) |
+ removes: |
+ |
+ gsutil -m rm -r gs://my_bucket/subdir |
+ |
+ You can pass a list of URLs (one per line) to remove on stdin instead of as |
+ command line arguments by using the -I option. This allows you to use gsutil |
+ in a pipeline to remove objects identified by a program, such as: |
+ |
+ some_program | gsutil -m rm -I |
+ |
+ The contents of stdin can name cloud URLs and wildcards of cloud URLs. |
+ |
+ Note that gsutil rm will refuse to remove files from the local |
+ file system. For example this will fail: |
+ |
+ gsutil rm *.txt |
+ |
+ WARNING: Object removal cannot be undone. Google Cloud Storage is designed |
+ to give developers a high amount of flexibility and control over their data, |
+ and Google maintains strict controls over the processing and purging of |
+ deleted data. To protect yourself from mistakes, you can configure object |
+ versioning on your bucket(s). See 'gsutil help versions' for details. |
+ |
+ |
+<B>DATA RESTORATION FROM ACCIDENTAL DELETION OR OVERWRITES</B> |
+Google Cloud Storage does not provide support for restoring data lost |
+or overwritten due to customer errors. If you have concerns that your |
+application software (or your users) may at some point erroneously delete or |
+overwrite data, you can protect yourself from that risk by enabling Object |
+Versioning (see "gsutil help versioning"). Doing so increases storage costs, |
+which can be partially mitigated by configuring Lifecycle Management to delete |
+older object versions (see "gsutil help lifecycle"). |
+ |
+ |
+<B>OPTIONS</B> |
+ -f Continues silently (without printing error messages) despite |
+ errors when removing multiple objects. If some of the objects |
+ could not be removed, gsutil's exit status will be non-zero even |
+ if this flag is set. This option is implicitly set when running |
+ "gsutil -m rm ...". |
+ |
+ -I Causes gsutil to read the list of objects to remove from stdin. |
+ This allows you to run a program that generates the list of |
+ objects to remove. |
+ |
+ -R, -r Causes bucket or bucket subdirectory contents (all objects and |
+ subdirectories that it contains) to be removed recursively. If |
+ used with a bucket-only URL (like gs://bucket), after deleting |
+ objects and subdirectories gsutil will delete the bucket. The -r |
+ flag implies the -a flag and will delete all object versions. |
+ |
+ -a Delete all versions of an object. |
+""") |
+ |
+ |
+def _RemoveExceptionHandler(cls, e): |
+ """Simple exception handler to allow post-completion status.""" |
+ if not cls.continue_on_error: |
+ cls.logger.error(str(e)) |
+ cls.everything_removed_okay = False |
+ |
+ |
+# pylint: disable=unused-argument |
+def _RemoveFoldersExceptionHandler(cls, e): |
+ """When removing folders, we don't mind if none exist.""" |
+ if (isinstance(e, CommandException.__class__) and |
+ 'No URLs matched' in e.message): |
+ pass |
+ else: |
+ raise e |
+ |
+ |
+def _RemoveFuncWrapper(cls, name_expansion_result, thread_state=None): |
+ cls.RemoveFunc(name_expansion_result, thread_state=thread_state) |
+ |
+ |
+class RmCommand(Command): |
+ """Implementation of gsutil rm command.""" |
+ |
+ # Command specification. See base class for documentation. |
+ command_spec = Command.CreateCommandSpec( |
+ 'rm', |
+ command_name_aliases=['del', 'delete', 'remove'], |
+ usage_synopsis=_SYNOPSIS, |
+ min_args=0, |
+ max_args=NO_MAX, |
+ supported_sub_args='afIrR', |
+ file_url_ok=False, |
+ provider_url_ok=False, |
+ urls_start_arg=0, |
+ gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
+ gs_default_api=ApiSelector.JSON, |
+ argparse_arguments=[ |
+ CommandArgument.MakeZeroOrMoreCloudURLsArgument() |
+ ] |
+ ) |
+ # Help specification. See help_provider.py for documentation. |
+ help_spec = Command.HelpSpec( |
+ help_name='rm', |
+ help_name_aliases=['del', 'delete', 'remove'], |
+ help_type='command_help', |
+ help_one_line_summary='Remove objects', |
+ help_text=_DETAILED_HELP_TEXT, |
+ subcommand_help_text={}, |
+ ) |
+ |
+ def RunCommand(self): |
+ """Command entry point for the rm command.""" |
+ # self.recursion_requested is initialized in command.py (so it can be |
+ # checked in parent class for all commands). |
+ self.continue_on_error = False |
+ self.read_args_from_stdin = False |
+ self.all_versions = False |
+ if self.sub_opts: |
+ for o, unused_a in self.sub_opts: |
+ if o == '-a': |
+ self.all_versions = True |
+ elif o == '-f': |
+ self.continue_on_error = True |
+ elif o == '-I': |
+ self.read_args_from_stdin = True |
+ elif o == '-r' or o == '-R': |
+ self.recursion_requested = True |
+ self.all_versions = True |
+ |
+ if self.read_args_from_stdin: |
+ if self.args: |
+ raise CommandException('No arguments allowed with the -I flag.') |
+ url_strs = StdinIterator() |
+ else: |
+ if not self.args: |
+ raise CommandException('The rm command (without -I) expects at ' |
+ 'least one URL.') |
+ url_strs = self.args |
+ |
+ bucket_urls_to_delete = [] |
+ bucket_strings_to_delete = [] |
+ if self.recursion_requested: |
+ bucket_fields = ['id'] |
+ for url_str in url_strs: |
+ url = StorageUrlFromString(url_str) |
+ if url.IsBucket() or url.IsProvider(): |
+ for blr in self.WildcardIterator(url_str).IterBuckets( |
+ bucket_fields=bucket_fields): |
+ bucket_urls_to_delete.append(blr.storage_url) |
+ bucket_strings_to_delete.append(url_str) |
+ |
+ self.preconditions = PreconditionsFromHeaders(self.headers or {}) |
+ |
+ # Used to track if any files failed to be removed. |
+ self.everything_removed_okay = True |
+ |
+ try: |
+ # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. |
+ name_expansion_iterator = NameExpansionIterator( |
+ self.command_name, self.debug, self.logger, self.gsutil_api, |
+ url_strs, self.recursion_requested, project_id=self.project_id, |
+ all_versions=self.all_versions, |
+ continue_on_error=self.continue_on_error or self.parallel_operations) |
+ |
+ # Perform remove requests in parallel (-m) mode, if requested, using |
+ # configured number of parallel processes and threads. Otherwise, |
+ # perform requests with sequential function calls in current process. |
+ self.Apply(_RemoveFuncWrapper, name_expansion_iterator, |
+ _RemoveExceptionHandler, |
+ fail_on_error=(not self.continue_on_error)) |
+ |
+ # Assuming the bucket has versioning enabled, url's that don't map to |
+ # objects should throw an error even with all_versions, since the prior |
+ # round of deletes only sends objects to a history table. |
+ # This assumption that rm -a is only called for versioned buckets should be |
+ # corrected, but the fix is non-trivial. |
+ except CommandException as e: |
+ # Don't raise if there are buckets to delete -- it's valid to say: |
+ # gsutil rm -r gs://some_bucket |
+ # if the bucket is empty. |
+ if not bucket_urls_to_delete and not self.continue_on_error: |
+ raise |
+ # Reset the failure count if we failed due to an empty bucket that we're |
+ # going to delete. |
+ msg = 'No URLs matched: ' |
+ if msg in str(e): |
+ parts = str(e).split(msg) |
+ if len(parts) == 2 and parts[1] in bucket_strings_to_delete: |
+ ResetFailureCount() |
+ except ServiceException, e: |
+ if not self.continue_on_error: |
+ raise |
+ |
+ if not self.everything_removed_okay and not self.continue_on_error: |
+ raise CommandException('Some files could not be removed.') |
+ |
+ # If this was a gsutil rm -r command covering any bucket subdirs, |
+ # remove any dir_$folder$ objects (which are created by various web UI |
+ # tools to simulate folders). |
+ if self.recursion_requested: |
+ had_previous_failures = GetFailureCount() > 0 |
+ folder_object_wildcards = [] |
+ for url_str in url_strs: |
+ url = StorageUrlFromString(url_str) |
+ if url.IsObject(): |
+ folder_object_wildcards.append('%s**_$folder$' % url_str) |
+ if folder_object_wildcards: |
+ self.continue_on_error = True |
+ try: |
+ name_expansion_iterator = NameExpansionIterator( |
+ self.command_name, self.debug, |
+ self.logger, self.gsutil_api, |
+ folder_object_wildcards, self.recursion_requested, |
+ project_id=self.project_id, |
+ all_versions=self.all_versions) |
+ # When we're removing folder objects, always continue on error |
+ self.Apply(_RemoveFuncWrapper, name_expansion_iterator, |
+ _RemoveFoldersExceptionHandler, |
+ fail_on_error=False) |
+ except CommandException as e: |
+ # Ignore exception from name expansion due to an absent folder file. |
+ if not e.reason.startswith('No URLs matched:'): |
+ raise |
+ if not had_previous_failures: |
+ ResetFailureCount() |
+ |
+ # Now that all data has been deleted, delete any bucket URLs. |
+ for url in bucket_urls_to_delete: |
+ self.logger.info('Removing %s...', url) |
+ |
+ @Retry(NotEmptyException, tries=3, timeout_secs=1) |
+ def BucketDeleteWithRetry(): |
+ self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) |
+ |
+ BucketDeleteWithRetry() |
+ |
+ return 0 |
+ |
+ def RemoveFunc(self, name_expansion_result, thread_state=None): |
+ gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) |
+ |
+ exp_src_url = name_expansion_result.expanded_storage_url |
+ self.logger.info('Removing %s...', exp_src_url) |
+ gsutil_api.DeleteObject( |
+ exp_src_url.bucket_name, exp_src_url.object_name, |
+ preconditions=self.preconditions, generation=exp_src_url.generation, |
+ provider=exp_src_url.scheme) |
+ |