Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(311)

Unified Diff: third_party/gsutil/gslib/commands/du.py

Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master
Patch Set: Rename to gsutil. Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/gsutil/gslib/commands/defacl.py ('k') | third_party/gsutil/gslib/commands/hash.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/gsutil/gslib/commands/du.py
diff --git a/third_party/gsutil/gslib/commands/du.py b/third_party/gsutil/gslib/commands/du.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6b4bcb590974843ac2d695695c175efdab059fd
--- /dev/null
+++ b/third_party/gsutil/gslib/commands/du.py
@@ -0,0 +1,284 @@
+# -*- coding: utf-8 -*-
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of Unix-like du command for cloud storage providers."""
+
+from __future__ import absolute_import
+
+import sys
+
+from gslib.boto_translation import S3_DELETE_MARKER_GUID
+from gslib.bucket_listing_ref import BucketListingObject
+from gslib.command import Command
+from gslib.command_argument import CommandArgument
+from gslib.cs_api_map import ApiSelector
+from gslib.exception import CommandException
+from gslib.ls_helper import LsHelper
+from gslib.storage_url import ContainsWildcard
+from gslib.storage_url import StorageUrlFromString
+from gslib.util import MakeHumanReadable
+from gslib.util import NO_MAX
+from gslib.util import UTF8
+
+_SYNOPSIS = """
+ gsutil du url...
+"""
+
+_DETAILED_HELP_TEXT = ("""
+<B>SYNOPSIS</B>
+""" + _SYNOPSIS + """
+
+
+<B>DESCRIPTION</B>
+ The du command displays the amount of space (in bytes) being used by the
+ objects in the file or object hierarchy under a given URL. The syntax emulates
+ the Linux du command (which stands for disk usage). For example, the command:
+
+ gsutil du -s gs://your-bucket/dir
+
+ will report the total space used by all objects under gs://your-bucket/dir and
+ any sub-directories.
+
+
+<B>OPTIONS</B>
+ -0 Ends each output line with a 0 byte rather than a newline. This
+ can be useful to make the output more easily machine-readable.
+
+ -a Includes non-current object versions / generations in the listing
+ (only useful with a versioning-enabled bucket). Also prints
+ generation and metageneration for each listed object.
+
+ -c Produce a grand total.
+
+ -e A pattern to exclude from reporting. Example: -e "*.o" would
+ exclude any object that ends in ".o". Can be specified multiple
+ times.
+
+ -h Prints object sizes in human-readable format (e.g., 1 KiB,
+ 234 MiB, 2GiB, etc.)
+
+ -s Display only a summary total for each argument.
+
+ -X Similar to -e, but excludes patterns from the given file. The
+ patterns to exclude should be one per line.
+
+
+<B>EXAMPLES</B>
+ To list the size of all objects in a bucket:
+
+ gsutil du gs://bucketname
+
+ To list the size of all objects underneath a prefix:
+
+ gsutil du gs://bucketname/prefix/*
+
+ To print the total number of bytes in a bucket, in human-readable form:
+
+ gsutil du -ch gs://bucketname
+
+ To see a summary of the total bytes in the two given buckets:
+
+ gsutil du -s gs://bucket1 gs://bucket2
+
+ To list the size of all objects in a versioned bucket, including objects that
+ are not the latest:
+
+ gsutil du -a gs://bucketname
+
+ To list all objects in a bucket, except objects that end in ".bak",
+ with each object printed ending in a null byte:
+
+ gsutil du -e "*.bak" -0 gs://bucketname
+
+ To get a total of all buckets in a project with a grand total for an entire
+ project:
+
+ gsutil -o GSUtil:default_project_id=project-name du -shc
+""")
+
+
+class DuCommand(Command):
+ """Implementation of gsutil du command."""
+
+ # Command specification. See base class for documentation.
+ command_spec = Command.CreateCommandSpec(
+ 'du',
+ command_name_aliases=[],
+ usage_synopsis=_SYNOPSIS,
+ min_args=0,
+ max_args=NO_MAX,
+ supported_sub_args='0ace:hsX:',
+ file_url_ok=False,
+ provider_url_ok=True,
+ urls_start_arg=0,
+ gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
+ gs_default_api=ApiSelector.JSON,
+ argparse_arguments=[
+ CommandArgument.MakeZeroOrMoreCloudURLsArgument()
+ ]
+ )
+ # Help specification. See help_provider.py for documentation.
+ help_spec = Command.HelpSpec(
+ help_name='du',
+ help_name_aliases=[],
+ help_type='command_help',
+ help_one_line_summary='Display object size usage',
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
+
+ def _PrintSummaryLine(self, num_bytes, name):
+ size_string = (MakeHumanReadable(num_bytes)
+ if self.human_readable else str(num_bytes))
+ sys.stdout.write('%(size)-10s %(name)s%(ending)s' % {
+ 'size': size_string, 'name': name, 'ending': self.line_ending})
+
+ def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref):
+ """Print listing info for given bucket_listing_ref.
+
+ Args:
+ bucket_listing_ref: BucketListing being listed.
+
+ Returns:
+ Tuple (number of objects, object size)
+
+ Raises:
+ Exception: if calling bug encountered.
+ """
+ obj = bucket_listing_ref.root_object
+ url_str = bucket_listing_ref.url_string
+ if (obj.metadata and S3_DELETE_MARKER_GUID in
+ obj.metadata.additionalProperties):
+ size_string = '0'
+ num_bytes = 0
+ num_objs = 0
+ url_str += '<DeleteMarker>'
+ else:
+ size_string = (MakeHumanReadable(obj.size)
+ if self.human_readable else str(obj.size))
+ num_bytes = obj.size
+ num_objs = 1
+
+ if not self.summary_only:
+ sys.stdout.write('%(size)-10s %(url)s%(ending)s' % {
+ 'size': size_string,
+ 'url': url_str.encode(UTF8),
+ 'ending': self.line_ending})
+
+ return (num_objs, num_bytes)
+
+ def RunCommand(self):
+ """Command entry point for the du command."""
+ self.line_ending = '\n'
+ self.all_versions = False
+ self.produce_total = False
+ self.human_readable = False
+ self.summary_only = False
+ self.exclude_patterns = []
+ if self.sub_opts:
+ for o, a in self.sub_opts:
+ if o == '-0':
+ self.line_ending = '\0'
+ elif o == '-a':
+ self.all_versions = True
+ elif o == '-c':
+ self.produce_total = True
+ elif o == '-e':
+ self.exclude_patterns.append(a)
+ elif o == '-h':
+ self.human_readable = True
+ elif o == '-s':
+ self.summary_only = True
+ elif o == '-X':
+ if a == '-':
+ f = sys.stdin
+ else:
+ f = open(a, 'r')
+ try:
+ for line in f:
+ line = line.strip()
+ if line:
+ self.exclude_patterns.append(line)
+ finally:
+ f.close()
+
+ if not self.args:
+ # Default to listing all gs buckets.
+ self.args = ['gs://']
+
+ total_bytes = 0
+ got_nomatch_errors = False
+
+ def _PrintObjectLong(blr):
+ return self._PrintInfoAboutBucketListingRef(blr)
+
+ def _PrintNothing(unused_blr=None):
+ pass
+
+ def _PrintDirectory(num_bytes, name):
+ if not self.summary_only:
+ self._PrintSummaryLine(num_bytes, name)
+
+ for url_arg in self.args:
+ top_level_storage_url = StorageUrlFromString(url_arg)
+ if top_level_storage_url.IsFileUrl():
+ raise CommandException('Only cloud URLs are supported for %s'
+ % self.command_name)
+ bucket_listing_fields = ['size']
+
+ ls_helper = LsHelper(
+ self.WildcardIterator, self.logger,
+ print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing,
+ print_dir_header_func=_PrintNothing,
+ print_dir_summary_func=_PrintDirectory,
+ print_newline_func=_PrintNothing, all_versions=self.all_versions,
+ should_recurse=True, exclude_patterns=self.exclude_patterns,
+ fields=bucket_listing_fields)
+
+ # ls_helper expands to objects and prefixes, so perform a top-level
+ # expansion first.
+ if top_level_storage_url.IsProvider():
+ # Provider URL: use bucket wildcard to iterate over all buckets.
+ top_level_iter = self.WildcardIterator(
+ '%s://*' % top_level_storage_url.scheme).IterBuckets(
+ bucket_fields=['id'])
+ elif top_level_storage_url.IsBucket():
+ top_level_iter = self.WildcardIterator(
+ '%s://%s' % (top_level_storage_url.scheme,
+ top_level_storage_url.bucket_name)).IterBuckets(
+ bucket_fields=['id'])
+ else:
+ top_level_iter = [BucketListingObject(top_level_storage_url)]
+
+ for blr in top_level_iter:
+ storage_url = blr.storage_url
+ if storage_url.IsBucket() and self.summary_only:
+ storage_url = StorageUrlFromString(
+ storage_url.CreatePrefixUrl(wildcard_suffix='**'))
+ _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url)
+ if (storage_url.IsObject() and exp_objs == 0 and
+ ContainsWildcard(url_arg) and not self.exclude_patterns):
+ got_nomatch_errors = True
+ total_bytes += exp_bytes
+
+ if self.summary_only:
+ self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/'))
+
+ if self.produce_total:
+ self._PrintSummaryLine(total_bytes, 'total')
+
+ if got_nomatch_errors:
+ raise CommandException('One or more URLs matched no objects.')
+
+ return 0
« no previous file with comments | « third_party/gsutil/gslib/commands/defacl.py ('k') | third_party/gsutil/gslib/commands/hash.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698