Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(182)

Side by Side Diff: gslib/commands/du.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gslib/commands/defacl.py ('k') | gslib/commands/hash.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # -*- coding: utf-8 -*-
1 # Copyright 2013 Google Inc. All Rights Reserved. 2 # Copyright 2013 Google Inc. All Rights Reserved.
2 # 3 #
3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License. 5 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at 6 # You may obtain a copy of the License at
6 # 7 #
7 # http://www.apache.org/licenses/LICENSE-2.0 8 # http://www.apache.org/licenses/LICENSE-2.0
8 # 9 #
9 # Unless required by applicable law or agreed to in writing, software 10 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and 13 # See the License for the specific language governing permissions and
13 # limitations under the License. 14 # limitations under the License.
15 """Implementation of Unix-like du command for cloud storage providers."""
14 16
15 import fnmatch 17 from __future__ import absolute_import
18
16 import sys 19 import sys
17 20
18 from boto.s3.deletemarker import DeleteMarker 21 from gslib.boto_translation import S3_DELETE_MARKER_GUID
19 from gslib.bucket_listing_ref import BucketListingRef 22 from gslib.bucket_listing_ref import BucketListingObject
20 from gslib.command import Command 23 from gslib.command import Command
21 from gslib.command import COMMAND_NAME 24 from gslib.cs_api_map import ApiSelector
22 from gslib.command import COMMAND_NAME_ALIASES
23 from gslib.command import FILE_URIS_OK
24 from gslib.command import MAX_ARGS
25 from gslib.command import MIN_ARGS
26 from gslib.command import PROVIDER_URIS_OK
27 from gslib.command import SUPPORTED_SUB_ARGS
28 from gslib.command import URIS_START_ARG
29 from gslib.commands.ls import UriOnlyBlrExpansionIterator
30 from gslib.commands.ls import UriStrForObj
31 from gslib.exception import CommandException 25 from gslib.exception import CommandException
32 from gslib.help_provider import HELP_NAME 26 from gslib.ls_helper import LsHelper
33 from gslib.help_provider import HELP_NAME_ALIASES 27 from gslib.storage_url import ContainsWildcard
34 from gslib.help_provider import HELP_ONE_LINE_SUMMARY 28 from gslib.storage_url import StorageUrlFromString
35 from gslib.help_provider import HELP_TEXT
36 from gslib.help_provider import HelpType
37 from gslib.help_provider import HELP_TYPE
38 from gslib.plurality_checkable_iterator import PluralityCheckableIterator
39 from gslib.util import MakeHumanReadable 29 from gslib.util import MakeHumanReadable
40 from gslib.util import NO_MAX 30 from gslib.util import NO_MAX
41 from gslib.wildcard_iterator import ContainsWildcard 31 from gslib.util import UTF8
42 32
43 _detailed_help_text = (""" 33 _DETAILED_HELP_TEXT = ("""
44 <B>SYNOPSIS</B> 34 <B>SYNOPSIS</B>
45 gsutil du uri... 35 gsutil du url...
46 36
47 37
48 <B>DESCRIPTION</B> 38 <B>DESCRIPTION</B>
49 The du command displays the amount of space (in bytes) being used by the 39 The du command displays the amount of space (in bytes) being used by the
50 objects for a given URI. The syntax emulates the Linux du command (which 40 objects in the file or object hierarchy under a given URL. The syntax emulates
51 stands for disk usage). 41 the Linux du command (which stands for disk usage). For example, the command:
42
43 gsutil du -s gs://your-bucket/dir
44
45 will report the total space used by all objects under gs://your-bucket/dir and
46 any sub-directories.
52 47
53 48
54 <B>OPTIONS</B> 49 <B>OPTIONS</B>
55 -0 Ends each output line with a 0 byte rather than a newline. This 50 -0 Ends each output line with a 0 byte rather than a newline. This
56 can be useful to make the output more easily machine-readable. 51 can be useful to make the output more easily machine-readable.
57 52
58 -a Includes non-current object versions / generations in the listing 53 -a Includes non-current object versions / generations in the listing
59 (only useful with a versioning-enabled bucket). Also prints 54 (only useful with a versioning-enabled bucket). Also prints
60 generation and metageneration for each listed object. 55 generation and metageneration for each listed object.
61 56
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
94 To list the size of all objects in a versioned bucket, including objects that 89 To list the size of all objects in a versioned bucket, including objects that
95 are not the latest: 90 are not the latest:
96 91
97 gsutil du -a gs://bucketname 92 gsutil du -a gs://bucketname
98 93
99 To list all objects in a bucket, except objects that end in ".bak", 94 To list all objects in a bucket, except objects that end in ".bak",
100 with each object printed ending in a null byte: 95 with each object printed ending in a null byte:
101 96
102 gsutil du -e "*.bak" -0 gs://bucketname 97 gsutil du -e "*.bak" -0 gs://bucketname
103 98
99 To get a total of all buckets in a project with a grand total for an entire
100 project:
101
102 gsutil -o GSUtil:default_project_id=project-name du -shc
104 """) 103 """)
105 104
105
106 class DuCommand(Command): 106 class DuCommand(Command):
107 """Implementation of gsutil du command.""" 107 """Implementation of gsutil du command."""
108 108
109 # Command specification (processed by parent class). 109 # Command specification. See base class for documentation.
110 command_spec = { 110 command_spec = Command.CreateCommandSpec(
111 # Name of command. 111 'du',
112 COMMAND_NAME : 'du', 112 command_name_aliases=[],
113 # List of command name aliases. 113 min_args=0,
114 COMMAND_NAME_ALIASES : [], 114 max_args=NO_MAX,
115 # Min number of args required by this command. 115 supported_sub_args='0ace:hsX:',
116 MIN_ARGS : 0, 116 file_url_ok=False,
117 # Max number of args required by this command, or NO_MAX. 117 provider_url_ok=True,
118 MAX_ARGS : NO_MAX, 118 urls_start_arg=0,
119 # Getopt-style string specifying acceptable sub args. 119 gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
120 SUPPORTED_SUB_ARGS : '0ace:hsX:', 120 gs_default_api=ApiSelector.JSON,
121 # True if file URIs acceptable for this command. 121 )
122 FILE_URIS_OK : False, 122 # Help specification. See help_provider.py for documentation.
123 # True if provider-only URIs acceptable for this command. 123 help_spec = Command.HelpSpec(
124 PROVIDER_URIS_OK : True, 124 help_name='du',
125 # Index in args of first URI arg. 125 help_name_aliases=[],
126 URIS_START_ARG : 0, 126 help_type='command_help',
127 } 127 help_one_line_summary='Display object size usage',
128 help_spec = { 128 help_text=_DETAILED_HELP_TEXT,
129 # Name of command or auxiliary help info for which this help applies. 129 subcommand_help_text={},
130 HELP_NAME : 'du', 130 )
131 # List of help name aliases.
132 HELP_NAME_ALIASES : [],
133 # Type of help:
134 HELP_TYPE : HelpType.COMMAND_HELP,
135 # One line summary of this help.
136 HELP_ONE_LINE_SUMMARY : 'Display object size usage',
137 # The full help text.
138 HELP_TEXT : _detailed_help_text,
139 }
140 131
141 def _PrintSummaryLine(self, num_bytes, name): 132 def _PrintSummaryLine(self, num_bytes, name):
142 size_string = (MakeHumanReadable(num_bytes) 133 size_string = (MakeHumanReadable(num_bytes)
143 if self.human_readable else str(num_bytes)) 134 if self.human_readable else str(num_bytes))
144 sys.stdout.write('%(size)-10s %(name)s%(ending)s' % { 135 sys.stdout.write('%(size)-10s %(name)s%(ending)s' % {
145 'size': size_string, 'name': name, 'ending': self.line_ending}) 136 'size': size_string, 'name': name, 'ending': self.line_ending})
146 137
147 def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref): 138 def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref):
148 """Print listing info for given bucket_listing_ref. 139 """Print listing info for given bucket_listing_ref.
149 140
150 Args: 141 Args:
151 bucket_listing_ref: BucketListing being listed. 142 bucket_listing_ref: BucketListing being listed.
152 143
153 Returns: 144 Returns:
154 Tuple (number of objects, object size) 145 Tuple (number of objects, object size)
155 146
156 Raises: 147 Raises:
157 Exception: if calling bug encountered. 148 Exception: if calling bug encountered.
158 """ 149 """
159 uri = bucket_listing_ref.GetUri() 150 obj = bucket_listing_ref.root_object
160 obj = bucket_listing_ref.GetKey() 151 url_str = bucket_listing_ref.url_string
161 uri_str = UriStrForObj(uri, obj, self.all_versions) 152 if (obj.metadata and S3_DELETE_MARKER_GUID in
162 153 obj.metadata.additionalProperties):
163 if isinstance(obj, DeleteMarker):
164 size_string = '0' 154 size_string = '0'
165 numobjs = 0 155 num_bytes = 0
166 numbytes = 0 156 num_objs = 0
157 url_str += '<DeleteMarker>'
167 else: 158 else:
168 size_string = (MakeHumanReadable(obj.size) 159 size_string = (MakeHumanReadable(obj.size)
169 if self.human_readable else str(obj.size)) 160 if self.human_readable else str(obj.size))
170 numobjs = 1 161 num_bytes = obj.size
171 numbytes = obj.size 162 num_objs = 1
172 163
173 if not self.summary_only: 164 if not self.summary_only:
174 sys.stdout.write('%(size)-10s %(uri)s%(ending)s' % { 165 sys.stdout.write('%(size)-10s %(url)s%(ending)s' % {
175 'size': size_string, 166 'size': size_string,
176 'uri': uri_str.encode('utf-8'), 167 'url': url_str.encode(UTF8),
177 'ending': self.line_ending}) 168 'ending': self.line_ending})
178 169
179 return numobjs, numbytes 170 return (num_objs, num_bytes)
180 171
181 def _RecursePrint(self, blr):
182 """
183 Expands a bucket listing reference and recurses to its children, calling
184 _PrintInfoAboutBucketListingRef for each expanded object found.
185
186 Args:
187 blr: An instance of BucketListingRef.
188
189 Returns:
190 Tuple containing (number of object, total number of bytes)
191 """
192 num_bytes = 0
193 num_objs = 0
194
195 if blr.HasKey():
196 blr_iterator = iter([blr])
197 elif blr.HasPrefix():
198 blr_iterator = self.WildcardIterator(
199 '%s/*' % blr.GetRStrippedUriString(), all_versions=self.all_versions)
200 elif blr.NamesBucket():
201 blr_iterator = self.WildcardIterator(
202 '%s*' % blr.GetUriString(), all_versions=self.all_versions)
203 else:
204 # This BLR didn't come from a bucket listing. This case happens for
205 # BLR's instantiated from a user-provided URI.
206 blr_iterator = PluralityCheckableIterator(
207 UriOnlyBlrExpansionIterator(
208 self, blr, all_versions=self.all_versions))
209 if blr_iterator.is_empty() and not ContainsWildcard(blr.GetUriString()):
210 raise CommandException('No such object %s' % blr.GetUriString())
211
212 for cur_blr in blr_iterator:
213 if self.exclude_patterns:
214 tomatch = cur_blr.GetUriString()
215 skip = False
216 for pattern in self.exclude_patterns:
217 if fnmatch.fnmatch(tomatch, pattern):
218 skip = True
219 break
220 if skip:
221 continue
222 if cur_blr.HasKey():
223 # Object listing.
224 no, nb = self._PrintInfoAboutBucketListingRef(cur_blr)
225 else:
226 # Subdir listing.
227 if cur_blr.GetUriString().endswith('//'):
228 # Expand gs://bucket// into gs://bucket//* so we don't infinite
229 # loop. This case happens when user has uploaded an object whose
230 # name begins with a /.
231 cur_blr = BucketListingRef(self.suri_builder.StorageUri(
232 '%s*' % cur_blr.GetUriString()), None, None, cur_blr.headers)
233 no, nb = self._RecursePrint(cur_blr)
234 num_bytes += nb
235 num_objs += no
236
237 if blr.HasPrefix() and not self.summary_only:
238 self._PrintSummaryLine(num_bytes, blr.GetUriString().encode('utf-8'))
239
240 return num_objs, num_bytes
241
242 # Command entry point.
243 def RunCommand(self): 172 def RunCommand(self):
173 """Command entry point for the du command."""
244 self.line_ending = '\n' 174 self.line_ending = '\n'
245 self.all_versions = False 175 self.all_versions = False
246 self.produce_total = False 176 self.produce_total = False
247 self.human_readable = False 177 self.human_readable = False
248 self.summary_only = False 178 self.summary_only = False
249 self.exclude_patterns = [] 179 self.exclude_patterns = []
250 if self.sub_opts: 180 if self.sub_opts:
251 for o, a in self.sub_opts: 181 for o, a in self.sub_opts:
252 if o == '-0': 182 if o == '-0':
253 self.line_ending = '\0' 183 self.line_ending = '\0'
(...skipping 17 matching lines...) Expand all
271 line = line.strip() 201 line = line.strip()
272 if line: 202 if line:
273 self.exclude_patterns.append(line) 203 self.exclude_patterns.append(line)
274 finally: 204 finally:
275 f.close() 205 f.close()
276 206
277 if not self.args: 207 if not self.args:
278 # Default to listing all gs buckets. 208 # Default to listing all gs buckets.
279 self.args = ['gs://'] 209 self.args = ['gs://']
280 210
281 total_objs = 0
282 total_bytes = 0 211 total_bytes = 0
283 got_nomatch_errors = False 212 got_nomatch_errors = False
284 213
285 for uri_str in self.args: 214 def _PrintObjectLong(blr):
286 uri = self.suri_builder.StorageUri(uri_str) 215 return self._PrintInfoAboutBucketListingRef(blr)
287 216
288 # Treat this as the ls command for this function. 217 def _PrintNothing(unused_blr=None):
289 self.proj_id_handler.FillInProjectHeaderIfNeeded('ls', uri, self.headers) 218 pass
290 219
291 iter_bytes = 0 220 def _PrintDirectory(num_bytes, name):
292 if uri.names_provider(): 221 if not self.summary_only:
293 # Provider URI: use bucket wildcard to list buckets. 222 self._PrintSummaryLine(num_bytes, name)
294 for uri in self.WildcardIterator('%s://*' % uri.scheme).IterUris(): 223
295 exp_objs, exp_bytes = self._RecursePrint(BucketListingRef(uri)) 224 for url_arg in self.args:
296 iter_bytes += exp_bytes 225 top_level_storage_url = StorageUrlFromString(url_arg)
297 total_objs += exp_objs 226 if top_level_storage_url.IsFileUrl():
227 raise CommandException('Only cloud URLs are supported for %s'
228 % self.command_name)
229 bucket_listing_fields = ['size']
230
231 ls_helper = LsHelper(
232 self.WildcardIterator, self.logger,
233 print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing,
234 print_dir_header_func=_PrintNothing,
235 print_dir_summary_func=_PrintDirectory,
236 print_newline_func=_PrintNothing, all_versions=self.all_versions,
237 should_recurse=True, exclude_patterns=self.exclude_patterns,
238 fields=bucket_listing_fields)
239
240 # ls_helper expands to objects and prefixes, so perform a top-level
241 # expansion first.
242 if top_level_storage_url.IsProvider():
243 # Provider URL: use bucket wildcard to iterate over all buckets.
244 top_level_iter = self.WildcardIterator(
245 '%s://*' % top_level_storage_url.scheme).IterBuckets(
246 bucket_fields=['id'])
247 elif top_level_storage_url.IsBucket():
248 top_level_iter = self.WildcardIterator(
249 '%s://%s' % (top_level_storage_url.scheme,
250 top_level_storage_url.bucket_name)).IterBuckets(
251 bucket_fields=['id'])
298 else: 252 else:
299 exp_objs, exp_bytes = self._RecursePrint(BucketListingRef(uri)) 253 top_level_iter = [BucketListingObject(top_level_storage_url)]
300 if (exp_objs == 0 and ContainsWildcard(uri) and 254
301 not self.exclude_patterns): 255 for blr in top_level_iter:
256 storage_url = blr.storage_url
257 if storage_url.IsBucket() and self.summary_only:
258 storage_url = StorageUrlFromString(
259 storage_url.CreatePrefixUrl(wildcard_suffix='**'))
260 _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url)
261 if (storage_url.IsObject() and exp_objs == 0 and
262 ContainsWildcard(url_arg) and not self.exclude_patterns):
302 got_nomatch_errors = True 263 got_nomatch_errors = True
303 iter_bytes += exp_bytes 264 total_bytes += exp_bytes
304 total_objs += exp_objs
305 265
306 total_bytes += iter_bytes 266 if self.summary_only:
307 if self.summary_only: 267 self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/'))
308 self._PrintSummaryLine(iter_bytes, uri_str)
309 268
310 if self.produce_total: 269 if self.produce_total:
311 self._PrintSummaryLine(total_bytes, 'total') 270 self._PrintSummaryLine(total_bytes, 'total')
312 271
313 if got_nomatch_errors: 272 if got_nomatch_errors:
314 raise CommandException('One or more URIs matched no objects.') 273 raise CommandException('One or more URLs matched no objects.')
315 274
316 return 0 275 return 0
OLDNEW
« no previous file with comments | « gslib/commands/defacl.py ('k') | gslib/commands/hash.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698