| OLD | NEW |
| 1 # -*- coding: utf-8 -*- |
| 1 # Copyright 2013 Google Inc. All Rights Reserved. | 2 # Copyright 2013 Google Inc. All Rights Reserved. |
| 2 # | 3 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
| 6 # | 7 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # | 9 # |
| 9 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, | 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and | 13 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. | 14 # limitations under the License. |
| 15 """Implementation of Unix-like du command for cloud storage providers.""" |
| 14 | 16 |
| 15 import fnmatch | 17 from __future__ import absolute_import |
| 18 |
| 16 import sys | 19 import sys |
| 17 | 20 |
| 18 from boto.s3.deletemarker import DeleteMarker | 21 from gslib.boto_translation import S3_DELETE_MARKER_GUID |
| 19 from gslib.bucket_listing_ref import BucketListingRef | 22 from gslib.bucket_listing_ref import BucketListingObject |
| 20 from gslib.command import Command | 23 from gslib.command import Command |
| 21 from gslib.command import COMMAND_NAME | 24 from gslib.cs_api_map import ApiSelector |
| 22 from gslib.command import COMMAND_NAME_ALIASES | |
| 23 from gslib.command import FILE_URIS_OK | |
| 24 from gslib.command import MAX_ARGS | |
| 25 from gslib.command import MIN_ARGS | |
| 26 from gslib.command import PROVIDER_URIS_OK | |
| 27 from gslib.command import SUPPORTED_SUB_ARGS | |
| 28 from gslib.command import URIS_START_ARG | |
| 29 from gslib.commands.ls import UriOnlyBlrExpansionIterator | |
| 30 from gslib.commands.ls import UriStrForObj | |
| 31 from gslib.exception import CommandException | 25 from gslib.exception import CommandException |
| 32 from gslib.help_provider import HELP_NAME | 26 from gslib.ls_helper import LsHelper |
| 33 from gslib.help_provider import HELP_NAME_ALIASES | 27 from gslib.storage_url import ContainsWildcard |
| 34 from gslib.help_provider import HELP_ONE_LINE_SUMMARY | 28 from gslib.storage_url import StorageUrlFromString |
| 35 from gslib.help_provider import HELP_TEXT | |
| 36 from gslib.help_provider import HelpType | |
| 37 from gslib.help_provider import HELP_TYPE | |
| 38 from gslib.plurality_checkable_iterator import PluralityCheckableIterator | |
| 39 from gslib.util import MakeHumanReadable | 29 from gslib.util import MakeHumanReadable |
| 40 from gslib.util import NO_MAX | 30 from gslib.util import NO_MAX |
| 41 from gslib.wildcard_iterator import ContainsWildcard | 31 from gslib.util import UTF8 |
| 42 | 32 |
| 43 _detailed_help_text = (""" | 33 _DETAILED_HELP_TEXT = (""" |
| 44 <B>SYNOPSIS</B> | 34 <B>SYNOPSIS</B> |
| 45 gsutil du uri... | 35 gsutil du url... |
| 46 | 36 |
| 47 | 37 |
| 48 <B>DESCRIPTION</B> | 38 <B>DESCRIPTION</B> |
| 49 The du command displays the amount of space (in bytes) being used by the | 39 The du command displays the amount of space (in bytes) being used by the |
| 50 objects for a given URI. The syntax emulates the Linux du command (which | 40 objects in the file or object hierarchy under a given URL. The syntax emulates |
| 51 stands for disk usage). | 41 the Linux du command (which stands for disk usage). For example, the command: |
| 42 |
| 43 gsutil du -s gs://your-bucket/dir |
| 44 |
| 45 will report the total space used by all objects under gs://your-bucket/dir and |
| 46 any sub-directories. |
| 52 | 47 |
| 53 | 48 |
| 54 <B>OPTIONS</B> | 49 <B>OPTIONS</B> |
| 55 -0 Ends each output line with a 0 byte rather than a newline. This | 50 -0 Ends each output line with a 0 byte rather than a newline. This |
| 56 can be useful to make the output more easily machine-readable. | 51 can be useful to make the output more easily machine-readable. |
| 57 | 52 |
| 58 -a Includes non-current object versions / generations in the listing | 53 -a Includes non-current object versions / generations in the listing |
| 59 (only useful with a versioning-enabled bucket). Also prints | 54 (only useful with a versioning-enabled bucket). Also prints |
| 60 generation and metageneration for each listed object. | 55 generation and metageneration for each listed object. |
| 61 | 56 |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 94 To list the size of all objects in a versioned bucket, including objects that | 89 To list the size of all objects in a versioned bucket, including objects that |
| 95 are not the latest: | 90 are not the latest: |
| 96 | 91 |
| 97 gsutil du -a gs://bucketname | 92 gsutil du -a gs://bucketname |
| 98 | 93 |
| 99 To list all objects in a bucket, except objects that end in ".bak", | 94 To list all objects in a bucket, except objects that end in ".bak", |
| 100 with each object printed ending in a null byte: | 95 with each object printed ending in a null byte: |
| 101 | 96 |
| 102 gsutil du -e "*.bak" -0 gs://bucketname | 97 gsutil du -e "*.bak" -0 gs://bucketname |
| 103 | 98 |
| 99 To get a total of all buckets in a project with a grand total for an entire |
| 100 project: |
| 101 |
| 102 gsutil -o GSUtil:default_project_id=project-name du -shc |
| 104 """) | 103 """) |
| 105 | 104 |
| 105 |
| 106 class DuCommand(Command): | 106 class DuCommand(Command): |
| 107 """Implementation of gsutil du command.""" | 107 """Implementation of gsutil du command.""" |
| 108 | 108 |
| 109 # Command specification (processed by parent class). | 109 # Command specification. See base class for documentation. |
| 110 command_spec = { | 110 command_spec = Command.CreateCommandSpec( |
| 111 # Name of command. | 111 'du', |
| 112 COMMAND_NAME : 'du', | 112 command_name_aliases=[], |
| 113 # List of command name aliases. | 113 min_args=0, |
| 114 COMMAND_NAME_ALIASES : [], | 114 max_args=NO_MAX, |
| 115 # Min number of args required by this command. | 115 supported_sub_args='0ace:hsX:', |
| 116 MIN_ARGS : 0, | 116 file_url_ok=False, |
| 117 # Max number of args required by this command, or NO_MAX. | 117 provider_url_ok=True, |
| 118 MAX_ARGS : NO_MAX, | 118 urls_start_arg=0, |
| 119 # Getopt-style string specifying acceptable sub args. | 119 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| 120 SUPPORTED_SUB_ARGS : '0ace:hsX:', | 120 gs_default_api=ApiSelector.JSON, |
| 121 # True if file URIs acceptable for this command. | 121 ) |
| 122 FILE_URIS_OK : False, | 122 # Help specification. See help_provider.py for documentation. |
| 123 # True if provider-only URIs acceptable for this command. | 123 help_spec = Command.HelpSpec( |
| 124 PROVIDER_URIS_OK : True, | 124 help_name='du', |
| 125 # Index in args of first URI arg. | 125 help_name_aliases=[], |
| 126 URIS_START_ARG : 0, | 126 help_type='command_help', |
| 127 } | 127 help_one_line_summary='Display object size usage', |
| 128 help_spec = { | 128 help_text=_DETAILED_HELP_TEXT, |
| 129 # Name of command or auxiliary help info for which this help applies. | 129 subcommand_help_text={}, |
| 130 HELP_NAME : 'du', | 130 ) |
| 131 # List of help name aliases. | |
| 132 HELP_NAME_ALIASES : [], | |
| 133 # Type of help: | |
| 134 HELP_TYPE : HelpType.COMMAND_HELP, | |
| 135 # One line summary of this help. | |
| 136 HELP_ONE_LINE_SUMMARY : 'Display object size usage', | |
| 137 # The full help text. | |
| 138 HELP_TEXT : _detailed_help_text, | |
| 139 } | |
| 140 | 131 |
| 141 def _PrintSummaryLine(self, num_bytes, name): | 132 def _PrintSummaryLine(self, num_bytes, name): |
| 142 size_string = (MakeHumanReadable(num_bytes) | 133 size_string = (MakeHumanReadable(num_bytes) |
| 143 if self.human_readable else str(num_bytes)) | 134 if self.human_readable else str(num_bytes)) |
| 144 sys.stdout.write('%(size)-10s %(name)s%(ending)s' % { | 135 sys.stdout.write('%(size)-10s %(name)s%(ending)s' % { |
| 145 'size': size_string, 'name': name, 'ending': self.line_ending}) | 136 'size': size_string, 'name': name, 'ending': self.line_ending}) |
| 146 | 137 |
| 147 def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref): | 138 def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref): |
| 148 """Print listing info for given bucket_listing_ref. | 139 """Print listing info for given bucket_listing_ref. |
| 149 | 140 |
| 150 Args: | 141 Args: |
| 151 bucket_listing_ref: BucketListing being listed. | 142 bucket_listing_ref: BucketListing being listed. |
| 152 | 143 |
| 153 Returns: | 144 Returns: |
| 154 Tuple (number of objects, object size) | 145 Tuple (number of objects, object size) |
| 155 | 146 |
| 156 Raises: | 147 Raises: |
| 157 Exception: if calling bug encountered. | 148 Exception: if calling bug encountered. |
| 158 """ | 149 """ |
| 159 uri = bucket_listing_ref.GetUri() | 150 obj = bucket_listing_ref.root_object |
| 160 obj = bucket_listing_ref.GetKey() | 151 url_str = bucket_listing_ref.url_string |
| 161 uri_str = UriStrForObj(uri, obj, self.all_versions) | 152 if (obj.metadata and S3_DELETE_MARKER_GUID in |
| 162 | 153 obj.metadata.additionalProperties): |
| 163 if isinstance(obj, DeleteMarker): | |
| 164 size_string = '0' | 154 size_string = '0' |
| 165 numobjs = 0 | 155 num_bytes = 0 |
| 166 numbytes = 0 | 156 num_objs = 0 |
| 157 url_str += '<DeleteMarker>' |
| 167 else: | 158 else: |
| 168 size_string = (MakeHumanReadable(obj.size) | 159 size_string = (MakeHumanReadable(obj.size) |
| 169 if self.human_readable else str(obj.size)) | 160 if self.human_readable else str(obj.size)) |
| 170 numobjs = 1 | 161 num_bytes = obj.size |
| 171 numbytes = obj.size | 162 num_objs = 1 |
| 172 | 163 |
| 173 if not self.summary_only: | 164 if not self.summary_only: |
| 174 sys.stdout.write('%(size)-10s %(uri)s%(ending)s' % { | 165 sys.stdout.write('%(size)-10s %(url)s%(ending)s' % { |
| 175 'size': size_string, | 166 'size': size_string, |
| 176 'uri': uri_str.encode('utf-8'), | 167 'url': url_str.encode(UTF8), |
| 177 'ending': self.line_ending}) | 168 'ending': self.line_ending}) |
| 178 | 169 |
| 179 return numobjs, numbytes | 170 return (num_objs, num_bytes) |
| 180 | 171 |
| 181 def _RecursePrint(self, blr): | |
| 182 """ | |
| 183 Expands a bucket listing reference and recurses to its children, calling | |
| 184 _PrintInfoAboutBucketListingRef for each expanded object found. | |
| 185 | |
| 186 Args: | |
| 187 blr: An instance of BucketListingRef. | |
| 188 | |
| 189 Returns: | |
| 190 Tuple containing (number of object, total number of bytes) | |
| 191 """ | |
| 192 num_bytes = 0 | |
| 193 num_objs = 0 | |
| 194 | |
| 195 if blr.HasKey(): | |
| 196 blr_iterator = iter([blr]) | |
| 197 elif blr.HasPrefix(): | |
| 198 blr_iterator = self.WildcardIterator( | |
| 199 '%s/*' % blr.GetRStrippedUriString(), all_versions=self.all_versions) | |
| 200 elif blr.NamesBucket(): | |
| 201 blr_iterator = self.WildcardIterator( | |
| 202 '%s*' % blr.GetUriString(), all_versions=self.all_versions) | |
| 203 else: | |
| 204 # This BLR didn't come from a bucket listing. This case happens for | |
| 205 # BLR's instantiated from a user-provided URI. | |
| 206 blr_iterator = PluralityCheckableIterator( | |
| 207 UriOnlyBlrExpansionIterator( | |
| 208 self, blr, all_versions=self.all_versions)) | |
| 209 if blr_iterator.is_empty() and not ContainsWildcard(blr.GetUriString()): | |
| 210 raise CommandException('No such object %s' % blr.GetUriString()) | |
| 211 | |
| 212 for cur_blr in blr_iterator: | |
| 213 if self.exclude_patterns: | |
| 214 tomatch = cur_blr.GetUriString() | |
| 215 skip = False | |
| 216 for pattern in self.exclude_patterns: | |
| 217 if fnmatch.fnmatch(tomatch, pattern): | |
| 218 skip = True | |
| 219 break | |
| 220 if skip: | |
| 221 continue | |
| 222 if cur_blr.HasKey(): | |
| 223 # Object listing. | |
| 224 no, nb = self._PrintInfoAboutBucketListingRef(cur_blr) | |
| 225 else: | |
| 226 # Subdir listing. | |
| 227 if cur_blr.GetUriString().endswith('//'): | |
| 228 # Expand gs://bucket// into gs://bucket//* so we don't infinite | |
| 229 # loop. This case happens when user has uploaded an object whose | |
| 230 # name begins with a /. | |
| 231 cur_blr = BucketListingRef(self.suri_builder.StorageUri( | |
| 232 '%s*' % cur_blr.GetUriString()), None, None, cur_blr.headers) | |
| 233 no, nb = self._RecursePrint(cur_blr) | |
| 234 num_bytes += nb | |
| 235 num_objs += no | |
| 236 | |
| 237 if blr.HasPrefix() and not self.summary_only: | |
| 238 self._PrintSummaryLine(num_bytes, blr.GetUriString().encode('utf-8')) | |
| 239 | |
| 240 return num_objs, num_bytes | |
| 241 | |
| 242 # Command entry point. | |
| 243 def RunCommand(self): | 172 def RunCommand(self): |
| 173 """Command entry point for the du command.""" |
| 244 self.line_ending = '\n' | 174 self.line_ending = '\n' |
| 245 self.all_versions = False | 175 self.all_versions = False |
| 246 self.produce_total = False | 176 self.produce_total = False |
| 247 self.human_readable = False | 177 self.human_readable = False |
| 248 self.summary_only = False | 178 self.summary_only = False |
| 249 self.exclude_patterns = [] | 179 self.exclude_patterns = [] |
| 250 if self.sub_opts: | 180 if self.sub_opts: |
| 251 for o, a in self.sub_opts: | 181 for o, a in self.sub_opts: |
| 252 if o == '-0': | 182 if o == '-0': |
| 253 self.line_ending = '\0' | 183 self.line_ending = '\0' |
| (...skipping 17 matching lines...) Expand all Loading... |
| 271 line = line.strip() | 201 line = line.strip() |
| 272 if line: | 202 if line: |
| 273 self.exclude_patterns.append(line) | 203 self.exclude_patterns.append(line) |
| 274 finally: | 204 finally: |
| 275 f.close() | 205 f.close() |
| 276 | 206 |
| 277 if not self.args: | 207 if not self.args: |
| 278 # Default to listing all gs buckets. | 208 # Default to listing all gs buckets. |
| 279 self.args = ['gs://'] | 209 self.args = ['gs://'] |
| 280 | 210 |
| 281 total_objs = 0 | |
| 282 total_bytes = 0 | 211 total_bytes = 0 |
| 283 got_nomatch_errors = False | 212 got_nomatch_errors = False |
| 284 | 213 |
| 285 for uri_str in self.args: | 214 def _PrintObjectLong(blr): |
| 286 uri = self.suri_builder.StorageUri(uri_str) | 215 return self._PrintInfoAboutBucketListingRef(blr) |
| 287 | 216 |
| 288 # Treat this as the ls command for this function. | 217 def _PrintNothing(unused_blr=None): |
| 289 self.proj_id_handler.FillInProjectHeaderIfNeeded('ls', uri, self.headers) | 218 pass |
| 290 | 219 |
| 291 iter_bytes = 0 | 220 def _PrintDirectory(num_bytes, name): |
| 292 if uri.names_provider(): | 221 if not self.summary_only: |
| 293 # Provider URI: use bucket wildcard to list buckets. | 222 self._PrintSummaryLine(num_bytes, name) |
| 294 for uri in self.WildcardIterator('%s://*' % uri.scheme).IterUris(): | 223 |
| 295 exp_objs, exp_bytes = self._RecursePrint(BucketListingRef(uri)) | 224 for url_arg in self.args: |
| 296 iter_bytes += exp_bytes | 225 top_level_storage_url = StorageUrlFromString(url_arg) |
| 297 total_objs += exp_objs | 226 if top_level_storage_url.IsFileUrl(): |
| 227 raise CommandException('Only cloud URLs are supported for %s' |
| 228 % self.command_name) |
| 229 bucket_listing_fields = ['size'] |
| 230 |
| 231 ls_helper = LsHelper( |
| 232 self.WildcardIterator, self.logger, |
| 233 print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, |
| 234 print_dir_header_func=_PrintNothing, |
| 235 print_dir_summary_func=_PrintDirectory, |
| 236 print_newline_func=_PrintNothing, all_versions=self.all_versions, |
| 237 should_recurse=True, exclude_patterns=self.exclude_patterns, |
| 238 fields=bucket_listing_fields) |
| 239 |
| 240 # ls_helper expands to objects and prefixes, so perform a top-level |
| 241 # expansion first. |
| 242 if top_level_storage_url.IsProvider(): |
| 243 # Provider URL: use bucket wildcard to iterate over all buckets. |
| 244 top_level_iter = self.WildcardIterator( |
| 245 '%s://*' % top_level_storage_url.scheme).IterBuckets( |
| 246 bucket_fields=['id']) |
| 247 elif top_level_storage_url.IsBucket(): |
| 248 top_level_iter = self.WildcardIterator( |
| 249 '%s://%s' % (top_level_storage_url.scheme, |
| 250 top_level_storage_url.bucket_name)).IterBuckets( |
| 251 bucket_fields=['id']) |
| 298 else: | 252 else: |
| 299 exp_objs, exp_bytes = self._RecursePrint(BucketListingRef(uri)) | 253 top_level_iter = [BucketListingObject(top_level_storage_url)] |
| 300 if (exp_objs == 0 and ContainsWildcard(uri) and | 254 |
| 301 not self.exclude_patterns): | 255 for blr in top_level_iter: |
| 256 storage_url = blr.storage_url |
| 257 if storage_url.IsBucket() and self.summary_only: |
| 258 storage_url = StorageUrlFromString( |
| 259 storage_url.CreatePrefixUrl(wildcard_suffix='**')) |
| 260 _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url) |
| 261 if (storage_url.IsObject() and exp_objs == 0 and |
| 262 ContainsWildcard(url_arg) and not self.exclude_patterns): |
| 302 got_nomatch_errors = True | 263 got_nomatch_errors = True |
| 303 iter_bytes += exp_bytes | 264 total_bytes += exp_bytes |
| 304 total_objs += exp_objs | |
| 305 | 265 |
| 306 total_bytes += iter_bytes | 266 if self.summary_only: |
| 307 if self.summary_only: | 267 self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/')) |
| 308 self._PrintSummaryLine(iter_bytes, uri_str) | |
| 309 | 268 |
| 310 if self.produce_total: | 269 if self.produce_total: |
| 311 self._PrintSummaryLine(total_bytes, 'total') | 270 self._PrintSummaryLine(total_bytes, 'total') |
| 312 | 271 |
| 313 if got_nomatch_errors: | 272 if got_nomatch_errors: |
| 314 raise CommandException('One or more URIs matched no objects.') | 273 raise CommandException('One or more URLs matched no objects.') |
| 315 | 274 |
| 316 return 0 | 275 return 0 |
| OLD | NEW |