| OLD | NEW |
| (Empty) |
| 1 # -*- coding: utf-8 -*- | |
| 2 # Copyright 2013 Google Inc. All Rights Reserved. | |
| 3 # | |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 # you may not use this file except in compliance with the License. | |
| 6 # You may obtain a copy of the License at | |
| 7 # | |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 | |
| 9 # | |
| 10 # Unless required by applicable law or agreed to in writing, software | |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, | |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 13 # See the License for the specific language governing permissions and | |
| 14 # limitations under the License. | |
| 15 """Implementation of Unix-like du command for cloud storage providers.""" | |
| 16 | |
| 17 from __future__ import absolute_import | |
| 18 | |
| 19 import sys | |
| 20 | |
| 21 from gslib.boto_translation import S3_DELETE_MARKER_GUID | |
| 22 from gslib.bucket_listing_ref import BucketListingObject | |
| 23 from gslib.command import Command | |
| 24 from gslib.command_argument import CommandArgument | |
| 25 from gslib.cs_api_map import ApiSelector | |
| 26 from gslib.exception import CommandException | |
| 27 from gslib.ls_helper import LsHelper | |
| 28 from gslib.storage_url import ContainsWildcard | |
| 29 from gslib.storage_url import StorageUrlFromString | |
| 30 from gslib.util import MakeHumanReadable | |
| 31 from gslib.util import NO_MAX | |
| 32 from gslib.util import UTF8 | |
| 33 | |
| 34 _SYNOPSIS = """ | |
| 35 gsutil du url... | |
| 36 """ | |
| 37 | |
| 38 _DETAILED_HELP_TEXT = (""" | |
| 39 <B>SYNOPSIS</B> | |
| 40 """ + _SYNOPSIS + """ | |
| 41 | |
| 42 | |
| 43 <B>DESCRIPTION</B> | |
| 44 The du command displays the amount of space (in bytes) being used by the | |
| 45 objects in the file or object hierarchy under a given URL. The syntax emulates | |
| 46 the Linux du command (which stands for disk usage). For example, the command: | |
| 47 | |
| 48 gsutil du -s gs://your-bucket/dir | |
| 49 | |
| 50 will report the total space used by all objects under gs://your-bucket/dir and | |
| 51 any sub-directories. | |
| 52 | |
| 53 | |
| 54 <B>OPTIONS</B> | |
| 55 -0 Ends each output line with a 0 byte rather than a newline. This | |
| 56 can be useful to make the output more easily machine-readable. | |
| 57 | |
| 58 -a Includes non-current object versions / generations in the listing | |
| 59 (only useful with a versioning-enabled bucket). Also prints | |
| 60 generation and metageneration for each listed object. | |
| 61 | |
| 62 -c Produce a grand total. | |
| 63 | |
| 64 -e A pattern to exclude from reporting. Example: -e "*.o" would | |
| 65 exclude any object that ends in ".o". Can be specified multiple | |
| 66 times. | |
| 67 | |
| 68 -h Prints object sizes in human-readable format (e.g., 1 KiB, | |
| 69 234 MiB, 2GiB, etc.) | |
| 70 | |
| 71 -s Display only a summary total for each argument. | |
| 72 | |
| 73 -X Similar to -e, but excludes patterns from the given file. The | |
| 74 patterns to exclude should be one per line. | |
| 75 | |
| 76 | |
| 77 <B>EXAMPLES</B> | |
| 78 To list the size of all objects in a bucket: | |
| 79 | |
| 80 gsutil du gs://bucketname | |
| 81 | |
| 82 To list the size of all objects underneath a prefix: | |
| 83 | |
| 84 gsutil du gs://bucketname/prefix/* | |
| 85 | |
| 86 To print the total number of bytes in a bucket, in human-readable form: | |
| 87 | |
| 88 gsutil du -ch gs://bucketname | |
| 89 | |
| 90 To see a summary of the total bytes in the two given buckets: | |
| 91 | |
| 92 gsutil du -s gs://bucket1 gs://bucket2 | |
| 93 | |
| 94 To list the size of all objects in a versioned bucket, including objects that | |
| 95 are not the latest: | |
| 96 | |
| 97 gsutil du -a gs://bucketname | |
| 98 | |
| 99 To list all objects in a bucket, except objects that end in ".bak", | |
| 100 with each object printed ending in a null byte: | |
| 101 | |
| 102 gsutil du -e "*.bak" -0 gs://bucketname | |
| 103 | |
| 104 To get a total of all buckets in a project with a grand total for an entire | |
| 105 project: | |
| 106 | |
| 107 gsutil -o GSUtil:default_project_id=project-name du -shc | |
| 108 """) | |
| 109 | |
| 110 | |
| 111 class DuCommand(Command): | |
| 112 """Implementation of gsutil du command.""" | |
| 113 | |
| 114 # Command specification. See base class for documentation. | |
| 115 command_spec = Command.CreateCommandSpec( | |
| 116 'du', | |
| 117 command_name_aliases=[], | |
| 118 usage_synopsis=_SYNOPSIS, | |
| 119 min_args=0, | |
| 120 max_args=NO_MAX, | |
| 121 supported_sub_args='0ace:hsX:', | |
| 122 file_url_ok=False, | |
| 123 provider_url_ok=True, | |
| 124 urls_start_arg=0, | |
| 125 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], | |
| 126 gs_default_api=ApiSelector.JSON, | |
| 127 argparse_arguments=[ | |
| 128 CommandArgument.MakeZeroOrMoreCloudURLsArgument() | |
| 129 ] | |
| 130 ) | |
| 131 # Help specification. See help_provider.py for documentation. | |
| 132 help_spec = Command.HelpSpec( | |
| 133 help_name='du', | |
| 134 help_name_aliases=[], | |
| 135 help_type='command_help', | |
| 136 help_one_line_summary='Display object size usage', | |
| 137 help_text=_DETAILED_HELP_TEXT, | |
| 138 subcommand_help_text={}, | |
| 139 ) | |
| 140 | |
| 141 def _PrintSummaryLine(self, num_bytes, name): | |
| 142 size_string = (MakeHumanReadable(num_bytes) | |
| 143 if self.human_readable else str(num_bytes)) | |
| 144 sys.stdout.write('%(size)-10s %(name)s%(ending)s' % { | |
| 145 'size': size_string, 'name': name, 'ending': self.line_ending}) | |
| 146 | |
| 147 def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref): | |
| 148 """Print listing info for given bucket_listing_ref. | |
| 149 | |
| 150 Args: | |
| 151 bucket_listing_ref: BucketListing being listed. | |
| 152 | |
| 153 Returns: | |
| 154 Tuple (number of objects, object size) | |
| 155 | |
| 156 Raises: | |
| 157 Exception: if calling bug encountered. | |
| 158 """ | |
| 159 obj = bucket_listing_ref.root_object | |
| 160 url_str = bucket_listing_ref.url_string | |
| 161 if (obj.metadata and S3_DELETE_MARKER_GUID in | |
| 162 obj.metadata.additionalProperties): | |
| 163 size_string = '0' | |
| 164 num_bytes = 0 | |
| 165 num_objs = 0 | |
| 166 url_str += '<DeleteMarker>' | |
| 167 else: | |
| 168 size_string = (MakeHumanReadable(obj.size) | |
| 169 if self.human_readable else str(obj.size)) | |
| 170 num_bytes = obj.size | |
| 171 num_objs = 1 | |
| 172 | |
| 173 if not self.summary_only: | |
| 174 sys.stdout.write('%(size)-10s %(url)s%(ending)s' % { | |
| 175 'size': size_string, | |
| 176 'url': url_str.encode(UTF8), | |
| 177 'ending': self.line_ending}) | |
| 178 | |
| 179 return (num_objs, num_bytes) | |
| 180 | |
| 181 def RunCommand(self): | |
| 182 """Command entry point for the du command.""" | |
| 183 self.line_ending = '\n' | |
| 184 self.all_versions = False | |
| 185 self.produce_total = False | |
| 186 self.human_readable = False | |
| 187 self.summary_only = False | |
| 188 self.exclude_patterns = [] | |
| 189 if self.sub_opts: | |
| 190 for o, a in self.sub_opts: | |
| 191 if o == '-0': | |
| 192 self.line_ending = '\0' | |
| 193 elif o == '-a': | |
| 194 self.all_versions = True | |
| 195 elif o == '-c': | |
| 196 self.produce_total = True | |
| 197 elif o == '-e': | |
| 198 self.exclude_patterns.append(a) | |
| 199 elif o == '-h': | |
| 200 self.human_readable = True | |
| 201 elif o == '-s': | |
| 202 self.summary_only = True | |
| 203 elif o == '-X': | |
| 204 if a == '-': | |
| 205 f = sys.stdin | |
| 206 else: | |
| 207 f = open(a, 'r') | |
| 208 try: | |
| 209 for line in f: | |
| 210 line = line.strip() | |
| 211 if line: | |
| 212 self.exclude_patterns.append(line) | |
| 213 finally: | |
| 214 f.close() | |
| 215 | |
| 216 if not self.args: | |
| 217 # Default to listing all gs buckets. | |
| 218 self.args = ['gs://'] | |
| 219 | |
| 220 total_bytes = 0 | |
| 221 got_nomatch_errors = False | |
| 222 | |
| 223 def _PrintObjectLong(blr): | |
| 224 return self._PrintInfoAboutBucketListingRef(blr) | |
| 225 | |
| 226 def _PrintNothing(unused_blr=None): | |
| 227 pass | |
| 228 | |
| 229 def _PrintDirectory(num_bytes, name): | |
| 230 if not self.summary_only: | |
| 231 self._PrintSummaryLine(num_bytes, name) | |
| 232 | |
| 233 for url_arg in self.args: | |
| 234 top_level_storage_url = StorageUrlFromString(url_arg) | |
| 235 if top_level_storage_url.IsFileUrl(): | |
| 236 raise CommandException('Only cloud URLs are supported for %s' | |
| 237 % self.command_name) | |
| 238 bucket_listing_fields = ['size'] | |
| 239 | |
| 240 ls_helper = LsHelper( | |
| 241 self.WildcardIterator, self.logger, | |
| 242 print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, | |
| 243 print_dir_header_func=_PrintNothing, | |
| 244 print_dir_summary_func=_PrintDirectory, | |
| 245 print_newline_func=_PrintNothing, all_versions=self.all_versions, | |
| 246 should_recurse=True, exclude_patterns=self.exclude_patterns, | |
| 247 fields=bucket_listing_fields) | |
| 248 | |
| 249 # ls_helper expands to objects and prefixes, so perform a top-level | |
| 250 # expansion first. | |
| 251 if top_level_storage_url.IsProvider(): | |
| 252 # Provider URL: use bucket wildcard to iterate over all buckets. | |
| 253 top_level_iter = self.WildcardIterator( | |
| 254 '%s://*' % top_level_storage_url.scheme).IterBuckets( | |
| 255 bucket_fields=['id']) | |
| 256 elif top_level_storage_url.IsBucket(): | |
| 257 top_level_iter = self.WildcardIterator( | |
| 258 '%s://%s' % (top_level_storage_url.scheme, | |
| 259 top_level_storage_url.bucket_name)).IterBuckets( | |
| 260 bucket_fields=['id']) | |
| 261 else: | |
| 262 top_level_iter = [BucketListingObject(top_level_storage_url)] | |
| 263 | |
| 264 for blr in top_level_iter: | |
| 265 storage_url = blr.storage_url | |
| 266 if storage_url.IsBucket() and self.summary_only: | |
| 267 storage_url = StorageUrlFromString( | |
| 268 storage_url.CreatePrefixUrl(wildcard_suffix='**')) | |
| 269 _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url) | |
| 270 if (storage_url.IsObject() and exp_objs == 0 and | |
| 271 ContainsWildcard(url_arg) and not self.exclude_patterns): | |
| 272 got_nomatch_errors = True | |
| 273 total_bytes += exp_bytes | |
| 274 | |
| 275 if self.summary_only: | |
| 276 self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/')) | |
| 277 | |
| 278 if self.produce_total: | |
| 279 self._PrintSummaryLine(total_bytes, 'total') | |
| 280 | |
| 281 if got_nomatch_errors: | |
| 282 raise CommandException('One or more URLs matched no objects.') | |
| 283 | |
| 284 return 0 | |
| OLD | NEW |