Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(257)

Unified Diff: gslib/commands/du.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « gslib/commands/defacl.py ('k') | gslib/commands/hash.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: gslib/commands/du.py
===================================================================
--- gslib/commands/du.py (revision 33376)
+++ gslib/commands/du.py (working copy)
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
# Copyright 2013 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,46 +12,40 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Implementation of Unix-like du command for cloud storage providers."""
-import fnmatch
+from __future__ import absolute_import
+
import sys
-from boto.s3.deletemarker import DeleteMarker
-from gslib.bucket_listing_ref import BucketListingRef
+from gslib.boto_translation import S3_DELETE_MARKER_GUID
+from gslib.bucket_listing_ref import BucketListingObject
from gslib.command import Command
-from gslib.command import COMMAND_NAME
-from gslib.command import COMMAND_NAME_ALIASES
-from gslib.command import FILE_URIS_OK
-from gslib.command import MAX_ARGS
-from gslib.command import MIN_ARGS
-from gslib.command import PROVIDER_URIS_OK
-from gslib.command import SUPPORTED_SUB_ARGS
-from gslib.command import URIS_START_ARG
-from gslib.commands.ls import UriOnlyBlrExpansionIterator
-from gslib.commands.ls import UriStrForObj
+from gslib.cs_api_map import ApiSelector
from gslib.exception import CommandException
-from gslib.help_provider import HELP_NAME
-from gslib.help_provider import HELP_NAME_ALIASES
-from gslib.help_provider import HELP_ONE_LINE_SUMMARY
-from gslib.help_provider import HELP_TEXT
-from gslib.help_provider import HelpType
-from gslib.help_provider import HELP_TYPE
-from gslib.plurality_checkable_iterator import PluralityCheckableIterator
+from gslib.ls_helper import LsHelper
+from gslib.storage_url import ContainsWildcard
+from gslib.storage_url import StorageUrlFromString
from gslib.util import MakeHumanReadable
from gslib.util import NO_MAX
-from gslib.wildcard_iterator import ContainsWildcard
+from gslib.util import UTF8
-_detailed_help_text = ("""
+_DETAILED_HELP_TEXT = ("""
<B>SYNOPSIS</B>
- gsutil du uri...
+ gsutil du url...
<B>DESCRIPTION</B>
The du command displays the amount of space (in bytes) being used by the
- objects for a given URI. The syntax emulates the Linux du command (which
- stands for disk usage).
+ objects in the file or object hierarchy under a given URL. The syntax emulates
+ the Linux du command (which stands for disk usage). For example, the command:
+ gsutil du -s gs://your-bucket/dir
+ will report the total space used by all objects under gs://your-bucket/dir and
+ any sub-directories.
+
+
<B>OPTIONS</B>
-0 Ends each output line with a 0 byte rather than a newline. This
can be useful to make the output more easily machine-readable.
@@ -101,48 +96,44 @@
gsutil du -e "*.bak" -0 gs://bucketname
+ To get a total of all buckets in a project with a grand total for an entire
+ project:
+
+ gsutil -o GSUtil:default_project_id=project-name du -shc
""")
+
class DuCommand(Command):
"""Implementation of gsutil du command."""
- # Command specification (processed by parent class).
- command_spec = {
- # Name of command.
- COMMAND_NAME : 'du',
- # List of command name aliases.
- COMMAND_NAME_ALIASES : [],
- # Min number of args required by this command.
- MIN_ARGS : 0,
- # Max number of args required by this command, or NO_MAX.
- MAX_ARGS : NO_MAX,
- # Getopt-style string specifying acceptable sub args.
- SUPPORTED_SUB_ARGS : '0ace:hsX:',
- # True if file URIs acceptable for this command.
- FILE_URIS_OK : False,
- # True if provider-only URIs acceptable for this command.
- PROVIDER_URIS_OK : True,
- # Index in args of first URI arg.
- URIS_START_ARG : 0,
- }
- help_spec = {
- # Name of command or auxiliary help info for which this help applies.
- HELP_NAME : 'du',
- # List of help name aliases.
- HELP_NAME_ALIASES : [],
- # Type of help:
- HELP_TYPE : HelpType.COMMAND_HELP,
- # One line summary of this help.
- HELP_ONE_LINE_SUMMARY : 'Display object size usage',
- # The full help text.
- HELP_TEXT : _detailed_help_text,
- }
+ # Command specification. See base class for documentation.
+ command_spec = Command.CreateCommandSpec(
+ 'du',
+ command_name_aliases=[],
+ min_args=0,
+ max_args=NO_MAX,
+ supported_sub_args='0ace:hsX:',
+ file_url_ok=False,
+ provider_url_ok=True,
+ urls_start_arg=0,
+ gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
+ gs_default_api=ApiSelector.JSON,
+ )
+ # Help specification. See help_provider.py for documentation.
+ help_spec = Command.HelpSpec(
+ help_name='du',
+ help_name_aliases=[],
+ help_type='command_help',
+ help_one_line_summary='Display object size usage',
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
def _PrintSummaryLine(self, num_bytes, name):
size_string = (MakeHumanReadable(num_bytes)
if self.human_readable else str(num_bytes))
sys.stdout.write('%(size)-10s %(name)s%(ending)s' % {
- 'size': size_string, 'name': name, 'ending': self.line_ending})
+ 'size': size_string, 'name': name, 'ending': self.line_ending})
def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref):
"""Print listing info for given bucket_listing_ref.
@@ -156,91 +147,30 @@
Raises:
Exception: if calling bug encountered.
"""
- uri = bucket_listing_ref.GetUri()
- obj = bucket_listing_ref.GetKey()
- uri_str = UriStrForObj(uri, obj, self.all_versions)
-
- if isinstance(obj, DeleteMarker):
+ obj = bucket_listing_ref.root_object
+ url_str = bucket_listing_ref.url_string
+ if (obj.metadata and S3_DELETE_MARKER_GUID in
+ obj.metadata.additionalProperties):
size_string = '0'
- numobjs = 0
- numbytes = 0
+ num_bytes = 0
+ num_objs = 0
+ url_str += '<DeleteMarker>'
else:
size_string = (MakeHumanReadable(obj.size)
if self.human_readable else str(obj.size))
- numobjs = 1
- numbytes = obj.size
+ num_bytes = obj.size
+ num_objs = 1
if not self.summary_only:
- sys.stdout.write('%(size)-10s %(uri)s%(ending)s' % {
+ sys.stdout.write('%(size)-10s %(url)s%(ending)s' % {
'size': size_string,
- 'uri': uri_str.encode('utf-8'),
+ 'url': url_str.encode(UTF8),
'ending': self.line_ending})
- return numobjs, numbytes
+ return (num_objs, num_bytes)
- def _RecursePrint(self, blr):
- """
- Expands a bucket listing reference and recurses to its children, calling
- _PrintInfoAboutBucketListingRef for each expanded object found.
-
- Args:
- blr: An instance of BucketListingRef.
-
- Returns:
- Tuple containing (number of object, total number of bytes)
- """
- num_bytes = 0
- num_objs = 0
-
- if blr.HasKey():
- blr_iterator = iter([blr])
- elif blr.HasPrefix():
- blr_iterator = self.WildcardIterator(
- '%s/*' % blr.GetRStrippedUriString(), all_versions=self.all_versions)
- elif blr.NamesBucket():
- blr_iterator = self.WildcardIterator(
- '%s*' % blr.GetUriString(), all_versions=self.all_versions)
- else:
- # This BLR didn't come from a bucket listing. This case happens for
- # BLR's instantiated from a user-provided URI.
- blr_iterator = PluralityCheckableIterator(
- UriOnlyBlrExpansionIterator(
- self, blr, all_versions=self.all_versions))
- if blr_iterator.is_empty() and not ContainsWildcard(blr.GetUriString()):
- raise CommandException('No such object %s' % blr.GetUriString())
-
- for cur_blr in blr_iterator:
- if self.exclude_patterns:
- tomatch = cur_blr.GetUriString()
- skip = False
- for pattern in self.exclude_patterns:
- if fnmatch.fnmatch(tomatch, pattern):
- skip = True
- break
- if skip:
- continue
- if cur_blr.HasKey():
- # Object listing.
- no, nb = self._PrintInfoAboutBucketListingRef(cur_blr)
- else:
- # Subdir listing.
- if cur_blr.GetUriString().endswith('//'):
- # Expand gs://bucket// into gs://bucket//* so we don't infinite
- # loop. This case happens when user has uploaded an object whose
- # name begins with a /.
- cur_blr = BucketListingRef(self.suri_builder.StorageUri(
- '%s*' % cur_blr.GetUriString()), None, None, cur_blr.headers)
- no, nb = self._RecursePrint(cur_blr)
- num_bytes += nb
- num_objs += no
-
- if blr.HasPrefix() and not self.summary_only:
- self._PrintSummaryLine(num_bytes, blr.GetUriString().encode('utf-8'))
-
- return num_objs, num_bytes
-
- # Command entry point.
def RunCommand(self):
+ """Command entry point for the du command."""
self.line_ending = '\n'
self.all_versions = False
self.produce_total = False
@@ -278,39 +208,68 @@
# Default to listing all gs buckets.
self.args = ['gs://']
- total_objs = 0
total_bytes = 0
got_nomatch_errors = False
- for uri_str in self.args:
- uri = self.suri_builder.StorageUri(uri_str)
+ def _PrintObjectLong(blr):
+ return self._PrintInfoAboutBucketListingRef(blr)
- # Treat this as the ls command for this function.
- self.proj_id_handler.FillInProjectHeaderIfNeeded('ls', uri, self.headers)
+ def _PrintNothing(unused_blr=None):
+ pass
- iter_bytes = 0
- if uri.names_provider():
- # Provider URI: use bucket wildcard to list buckets.
- for uri in self.WildcardIterator('%s://*' % uri.scheme).IterUris():
- exp_objs, exp_bytes = self._RecursePrint(BucketListingRef(uri))
- iter_bytes += exp_bytes
- total_objs += exp_objs
+ def _PrintDirectory(num_bytes, name):
+ if not self.summary_only:
+ self._PrintSummaryLine(num_bytes, name)
+
+ for url_arg in self.args:
+ top_level_storage_url = StorageUrlFromString(url_arg)
+ if top_level_storage_url.IsFileUrl():
+ raise CommandException('Only cloud URLs are supported for %s'
+ % self.command_name)
+ bucket_listing_fields = ['size']
+
+ ls_helper = LsHelper(
+ self.WildcardIterator, self.logger,
+ print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing,
+ print_dir_header_func=_PrintNothing,
+ print_dir_summary_func=_PrintDirectory,
+ print_newline_func=_PrintNothing, all_versions=self.all_versions,
+ should_recurse=True, exclude_patterns=self.exclude_patterns,
+ fields=bucket_listing_fields)
+
+ # ls_helper expands to objects and prefixes, so perform a top-level
+ # expansion first.
+ if top_level_storage_url.IsProvider():
+ # Provider URL: use bucket wildcard to iterate over all buckets.
+ top_level_iter = self.WildcardIterator(
+ '%s://*' % top_level_storage_url.scheme).IterBuckets(
+ bucket_fields=['id'])
+ elif top_level_storage_url.IsBucket():
+ top_level_iter = self.WildcardIterator(
+ '%s://%s' % (top_level_storage_url.scheme,
+ top_level_storage_url.bucket_name)).IterBuckets(
+ bucket_fields=['id'])
else:
- exp_objs, exp_bytes = self._RecursePrint(BucketListingRef(uri))
- if (exp_objs == 0 and ContainsWildcard(uri) and
- not self.exclude_patterns):
+ top_level_iter = [BucketListingObject(top_level_storage_url)]
+
+ for blr in top_level_iter:
+ storage_url = blr.storage_url
+ if storage_url.IsBucket() and self.summary_only:
+ storage_url = StorageUrlFromString(
+ storage_url.CreatePrefixUrl(wildcard_suffix='**'))
+ _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url)
+ if (storage_url.IsObject() and exp_objs == 0 and
+ ContainsWildcard(url_arg) and not self.exclude_patterns):
got_nomatch_errors = True
- iter_bytes += exp_bytes
- total_objs += exp_objs
+ total_bytes += exp_bytes
- total_bytes += iter_bytes
- if self.summary_only:
- self._PrintSummaryLine(iter_bytes, uri_str)
+ if self.summary_only:
+ self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/'))
if self.produce_total:
self._PrintSummaryLine(total_bytes, 'total')
if got_nomatch_errors:
- raise CommandException('One or more URIs matched no objects.')
+ raise CommandException('One or more URLs matched no objects.')
return 0
« no previous file with comments | « gslib/commands/defacl.py ('k') | gslib/commands/hash.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698