Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(321)

Unified Diff: third_party/gsutil/gslib/commands/rm.py

Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master
Patch Set: Rename to gsutil. Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/gsutil/gslib/commands/rb.py ('k') | third_party/gsutil/gslib/commands/rsync.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/gsutil/gslib/commands/rm.py
diff --git a/third_party/gsutil/gslib/commands/rm.py b/third_party/gsutil/gslib/commands/rm.py
new file mode 100644
index 0000000000000000000000000000000000000000..721314adbd4b30fdfdddb70b34f73d08009eafa4
--- /dev/null
+++ b/third_party/gsutil/gslib/commands/rm.py
@@ -0,0 +1,326 @@
+# -*- coding: utf-8 -*-
+# Copyright 2011 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of Unix-like rm command for cloud storage providers."""
+
+from __future__ import absolute_import
+
+from gslib.cloud_api import NotEmptyException
+from gslib.cloud_api import ServiceException
+from gslib.command import Command
+from gslib.command import GetFailureCount
+from gslib.command import ResetFailureCount
+from gslib.command_argument import CommandArgument
+from gslib.cs_api_map import ApiSelector
+from gslib.exception import CommandException
+from gslib.name_expansion import NameExpansionIterator
+from gslib.storage_url import StorageUrlFromString
+from gslib.translation_helper import PreconditionsFromHeaders
+from gslib.util import GetCloudApiInstance
+from gslib.util import NO_MAX
+from gslib.util import Retry
+from gslib.util import StdinIterator
+
+
+_SYNOPSIS = """
+ gsutil rm [-f] [-r] url...
+ gsutil rm [-f] [-r] -I
+"""
+
+_DETAILED_HELP_TEXT = ("""
+<B>SYNOPSIS</B>
+""" + _SYNOPSIS + """
+
+
+<B>DESCRIPTION</B>
+ The gsutil rm command removes objects.
+ For example, the command:
+
+ gsutil rm gs://bucket/subdir/*
+
+ will remove all objects in gs://bucket/subdir, but not in any of its
+ sub-directories. In contrast:
+
+ gsutil rm gs://bucket/subdir/**
+
+ will remove all objects under gs://bucket/subdir or any of its
+ subdirectories.
+
+ You can also use the -r option to specify recursive object deletion. Thus, for
+ example, either of the following two commands will remove gs://bucket/subdir
+ and all objects and subdirectories under it:
+
+ gsutil rm gs://bucket/subdir**
+ gsutil rm -r gs://bucket/subdir
+
+ The -r option will also delete all object versions in the subdirectory for
+ versioning-enabled buckets, whereas the ** command will only delete the live
+ version of each object in the subdirectory.
+
+ Running gsutil rm -r on a bucket will delete all versions of all objects in
+ the bucket, and then delete the bucket:
+
+ gsutil rm -r gs://bucket
+
+ If you want to delete all objects in the bucket, but not the bucket itself,
+ this command will work:
+
+ gsutil rm gs://bucket/**
+
+ If you have a large number of objects to remove you might want to use the
+ gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
+ removes:
+
+ gsutil -m rm -r gs://my_bucket/subdir
+
+ You can pass a list of URLs (one per line) to remove on stdin instead of as
+ command line arguments by using the -I option. This allows you to use gsutil
+ in a pipeline to remove objects identified by a program, such as:
+
+ some_program | gsutil -m rm -I
+
+ The contents of stdin can name cloud URLs and wildcards of cloud URLs.
+
+ Note that gsutil rm will refuse to remove files from the local
+ file system. For example this will fail:
+
+ gsutil rm *.txt
+
+ WARNING: Object removal cannot be undone. Google Cloud Storage is designed
+ to give developers a high amount of flexibility and control over their data,
+ and Google maintains strict controls over the processing and purging of
+ deleted data. To protect yourself from mistakes, you can configure object
+ versioning on your bucket(s). See 'gsutil help versions' for details.
+
+
+<B>DATA RESTORATION FROM ACCIDENTAL DELETION OR OVERWRITES</B>
+Google Cloud Storage does not provide support for restoring data lost
+or overwritten due to customer errors. If you have concerns that your
+application software (or your users) may at some point erroneously delete or
+overwrite data, you can protect yourself from that risk by enabling Object
+Versioning (see "gsutil help versioning"). Doing so increases storage costs,
+which can be partially mitigated by configuring Lifecycle Management to delete
+older object versions (see "gsutil help lifecycle").
+
+
+<B>OPTIONS</B>
+ -f Continues silently (without printing error messages) despite
+ errors when removing multiple objects. If some of the objects
+ could not be removed, gsutil's exit status will be non-zero even
+ if this flag is set. This option is implicitly set when running
+ "gsutil -m rm ...".
+
+ -I Causes gsutil to read the list of objects to remove from stdin.
+ This allows you to run a program that generates the list of
+ objects to remove.
+
+ -R, -r Causes bucket or bucket subdirectory contents (all objects and
+ subdirectories that it contains) to be removed recursively. If
+ used with a bucket-only URL (like gs://bucket), after deleting
+ objects and subdirectories gsutil will delete the bucket. The -r
+ flag implies the -a flag and will delete all object versions.
+
+ -a Delete all versions of an object.
+""")
+
+
+def _RemoveExceptionHandler(cls, e):
+ """Simple exception handler to allow post-completion status."""
+ if not cls.continue_on_error:
+ cls.logger.error(str(e))
+ cls.everything_removed_okay = False
+
+
+# pylint: disable=unused-argument
+def _RemoveFoldersExceptionHandler(cls, e):
+ """When removing folders, we don't mind if none exist."""
+ if (isinstance(e, CommandException.__class__) and
+ 'No URLs matched' in e.message):
+ pass
+ else:
+ raise e
+
+
+def _RemoveFuncWrapper(cls, name_expansion_result, thread_state=None):
+ cls.RemoveFunc(name_expansion_result, thread_state=thread_state)
+
+
+class RmCommand(Command):
+ """Implementation of gsutil rm command."""
+
+ # Command specification. See base class for documentation.
+ command_spec = Command.CreateCommandSpec(
+ 'rm',
+ command_name_aliases=['del', 'delete', 'remove'],
+ usage_synopsis=_SYNOPSIS,
+ min_args=0,
+ max_args=NO_MAX,
+ supported_sub_args='afIrR',
+ file_url_ok=False,
+ provider_url_ok=False,
+ urls_start_arg=0,
+ gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
+ gs_default_api=ApiSelector.JSON,
+ argparse_arguments=[
+ CommandArgument.MakeZeroOrMoreCloudURLsArgument()
+ ]
+ )
+ # Help specification. See help_provider.py for documentation.
+ help_spec = Command.HelpSpec(
+ help_name='rm',
+ help_name_aliases=['del', 'delete', 'remove'],
+ help_type='command_help',
+ help_one_line_summary='Remove objects',
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
+
+ def RunCommand(self):
+ """Command entry point for the rm command."""
+ # self.recursion_requested is initialized in command.py (so it can be
+ # checked in parent class for all commands).
+ self.continue_on_error = False
+ self.read_args_from_stdin = False
+ self.all_versions = False
+ if self.sub_opts:
+ for o, unused_a in self.sub_opts:
+ if o == '-a':
+ self.all_versions = True
+ elif o == '-f':
+ self.continue_on_error = True
+ elif o == '-I':
+ self.read_args_from_stdin = True
+ elif o == '-r' or o == '-R':
+ self.recursion_requested = True
+ self.all_versions = True
+
+ if self.read_args_from_stdin:
+ if self.args:
+ raise CommandException('No arguments allowed with the -I flag.')
+ url_strs = StdinIterator()
+ else:
+ if not self.args:
+ raise CommandException('The rm command (without -I) expects at '
+ 'least one URL.')
+ url_strs = self.args
+
+ bucket_urls_to_delete = []
+ bucket_strings_to_delete = []
+ if self.recursion_requested:
+ bucket_fields = ['id']
+ for url_str in url_strs:
+ url = StorageUrlFromString(url_str)
+ if url.IsBucket() or url.IsProvider():
+ for blr in self.WildcardIterator(url_str).IterBuckets(
+ bucket_fields=bucket_fields):
+ bucket_urls_to_delete.append(blr.storage_url)
+ bucket_strings_to_delete.append(url_str)
+
+ self.preconditions = PreconditionsFromHeaders(self.headers or {})
+
+ # Used to track if any files failed to be removed.
+ self.everything_removed_okay = True
+
+ try:
+ # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
+ name_expansion_iterator = NameExpansionIterator(
+ self.command_name, self.debug, self.logger, self.gsutil_api,
+ url_strs, self.recursion_requested, project_id=self.project_id,
+ all_versions=self.all_versions,
+ continue_on_error=self.continue_on_error or self.parallel_operations)
+
+ # Perform remove requests in parallel (-m) mode, if requested, using
+ # configured number of parallel processes and threads. Otherwise,
+ # perform requests with sequential function calls in current process.
+ self.Apply(_RemoveFuncWrapper, name_expansion_iterator,
+ _RemoveExceptionHandler,
+ fail_on_error=(not self.continue_on_error))
+
+ # Assuming the bucket has versioning enabled, url's that don't map to
+ # objects should throw an error even with all_versions, since the prior
+ # round of deletes only sends objects to a history table.
+ # This assumption that rm -a is only called for versioned buckets should be
+ # corrected, but the fix is non-trivial.
+ except CommandException as e:
+ # Don't raise if there are buckets to delete -- it's valid to say:
+ # gsutil rm -r gs://some_bucket
+ # if the bucket is empty.
+ if not bucket_urls_to_delete and not self.continue_on_error:
+ raise
+ # Reset the failure count if we failed due to an empty bucket that we're
+ # going to delete.
+ msg = 'No URLs matched: '
+ if msg in str(e):
+ parts = str(e).split(msg)
+ if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
+ ResetFailureCount()
+ except ServiceException, e:
+ if not self.continue_on_error:
+ raise
+
+ if not self.everything_removed_okay and not self.continue_on_error:
+ raise CommandException('Some files could not be removed.')
+
+ # If this was a gsutil rm -r command covering any bucket subdirs,
+ # remove any dir_$folder$ objects (which are created by various web UI
+ # tools to simulate folders).
+ if self.recursion_requested:
+ had_previous_failures = GetFailureCount() > 0
+ folder_object_wildcards = []
+ for url_str in url_strs:
+ url = StorageUrlFromString(url_str)
+ if url.IsObject():
+ folder_object_wildcards.append('%s**_$folder$' % url_str)
+ if folder_object_wildcards:
+ self.continue_on_error = True
+ try:
+ name_expansion_iterator = NameExpansionIterator(
+ self.command_name, self.debug,
+ self.logger, self.gsutil_api,
+ folder_object_wildcards, self.recursion_requested,
+ project_id=self.project_id,
+ all_versions=self.all_versions)
+ # When we're removing folder objects, always continue on error
+ self.Apply(_RemoveFuncWrapper, name_expansion_iterator,
+ _RemoveFoldersExceptionHandler,
+ fail_on_error=False)
+ except CommandException as e:
+ # Ignore exception from name expansion due to an absent folder file.
+ if not e.reason.startswith('No URLs matched:'):
+ raise
+ if not had_previous_failures:
+ ResetFailureCount()
+
+ # Now that all data has been deleted, delete any bucket URLs.
+ for url in bucket_urls_to_delete:
+ self.logger.info('Removing %s...', url)
+
+ @Retry(NotEmptyException, tries=3, timeout_secs=1)
+ def BucketDeleteWithRetry():
+ self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme)
+
+ BucketDeleteWithRetry()
+
+ return 0
+
+ def RemoveFunc(self, name_expansion_result, thread_state=None):
+ gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
+
+ exp_src_url = name_expansion_result.expanded_storage_url
+ self.logger.info('Removing %s...', exp_src_url)
+ gsutil_api.DeleteObject(
+ exp_src_url.bucket_name, exp_src_url.object_name,
+ preconditions=self.preconditions, generation=exp_src_url.generation,
+ provider=exp_src_url.scheme)
+
« no previous file with comments | « third_party/gsutil/gslib/commands/rb.py ('k') | third_party/gsutil/gslib/commands/rsync.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698