Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(219)

Side by Side Diff: third_party/gsutil/gslib/commands/du.py

Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master
Patch Set: Rename to gsutil. Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/gsutil/gslib/commands/defacl.py ('k') | third_party/gsutil/gslib/commands/hash.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # -*- coding: utf-8 -*-
2 # Copyright 2013 Google Inc. All Rights Reserved.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Implementation of Unix-like du command for cloud storage providers."""
16
17 from __future__ import absolute_import
18
19 import sys
20
21 from gslib.boto_translation import S3_DELETE_MARKER_GUID
22 from gslib.bucket_listing_ref import BucketListingObject
23 from gslib.command import Command
24 from gslib.command_argument import CommandArgument
25 from gslib.cs_api_map import ApiSelector
26 from gslib.exception import CommandException
27 from gslib.ls_helper import LsHelper
28 from gslib.storage_url import ContainsWildcard
29 from gslib.storage_url import StorageUrlFromString
30 from gslib.util import MakeHumanReadable
31 from gslib.util import NO_MAX
32 from gslib.util import UTF8
33
34 _SYNOPSIS = """
35 gsutil du url...
36 """
37
38 _DETAILED_HELP_TEXT = ("""
39 <B>SYNOPSIS</B>
40 """ + _SYNOPSIS + """
41
42
43 <B>DESCRIPTION</B>
44 The du command displays the amount of space (in bytes) being used by the
45 objects in the file or object hierarchy under a given URL. The syntax emulates
46 the Linux du command (which stands for disk usage). For example, the command:
47
48 gsutil du -s gs://your-bucket/dir
49
50 will report the total space used by all objects under gs://your-bucket/dir and
51 any sub-directories.
52
53
54 <B>OPTIONS</B>
55 -0 Ends each output line with a 0 byte rather than a newline. This
56 can be useful to make the output more easily machine-readable.
57
58 -a Includes non-current object versions / generations in the listing
59 (only useful with a versioning-enabled bucket). Also prints
60 generation and metageneration for each listed object.
61
62 -c Produce a grand total.
63
64 -e A pattern to exclude from reporting. Example: -e "*.o" would
65 exclude any object that ends in ".o". Can be specified multiple
66 times.
67
68 -h Prints object sizes in human-readable format (e.g., 1 KiB,
69 234 MiB, 2GiB, etc.)
70
71 -s Display only a summary total for each argument.
72
73 -X Similar to -e, but excludes patterns from the given file. The
74 patterns to exclude should be one per line.
75
76
77 <B>EXAMPLES</B>
78 To list the size of all objects in a bucket:
79
80 gsutil du gs://bucketname
81
82 To list the size of all objects underneath a prefix:
83
84 gsutil du gs://bucketname/prefix/*
85
86 To print the total number of bytes in a bucket, in human-readable form:
87
88 gsutil du -ch gs://bucketname
89
90 To see a summary of the total bytes in the two given buckets:
91
92 gsutil du -s gs://bucket1 gs://bucket2
93
94 To list the size of all objects in a versioned bucket, including objects that
95 are not the latest:
96
97 gsutil du -a gs://bucketname
98
99 To list all objects in a bucket, except objects that end in ".bak",
100 with each object printed ending in a null byte:
101
102 gsutil du -e "*.bak" -0 gs://bucketname
103
104 To get a total of all buckets in a project with a grand total for an entire
105 project:
106
107 gsutil -o GSUtil:default_project_id=project-name du -shc
108 """)
109
110
111 class DuCommand(Command):
112 """Implementation of gsutil du command."""
113
114 # Command specification. See base class for documentation.
115 command_spec = Command.CreateCommandSpec(
116 'du',
117 command_name_aliases=[],
118 usage_synopsis=_SYNOPSIS,
119 min_args=0,
120 max_args=NO_MAX,
121 supported_sub_args='0ace:hsX:',
122 file_url_ok=False,
123 provider_url_ok=True,
124 urls_start_arg=0,
125 gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
126 gs_default_api=ApiSelector.JSON,
127 argparse_arguments=[
128 CommandArgument.MakeZeroOrMoreCloudURLsArgument()
129 ]
130 )
131 # Help specification. See help_provider.py for documentation.
132 help_spec = Command.HelpSpec(
133 help_name='du',
134 help_name_aliases=[],
135 help_type='command_help',
136 help_one_line_summary='Display object size usage',
137 help_text=_DETAILED_HELP_TEXT,
138 subcommand_help_text={},
139 )
140
141 def _PrintSummaryLine(self, num_bytes, name):
142 size_string = (MakeHumanReadable(num_bytes)
143 if self.human_readable else str(num_bytes))
144 sys.stdout.write('%(size)-10s %(name)s%(ending)s' % {
145 'size': size_string, 'name': name, 'ending': self.line_ending})
146
147 def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref):
148 """Print listing info for given bucket_listing_ref.
149
150 Args:
151 bucket_listing_ref: BucketListing being listed.
152
153 Returns:
154 Tuple (number of objects, object size)
155
156 Raises:
157 Exception: if calling bug encountered.
158 """
159 obj = bucket_listing_ref.root_object
160 url_str = bucket_listing_ref.url_string
161 if (obj.metadata and S3_DELETE_MARKER_GUID in
162 obj.metadata.additionalProperties):
163 size_string = '0'
164 num_bytes = 0
165 num_objs = 0
166 url_str += '<DeleteMarker>'
167 else:
168 size_string = (MakeHumanReadable(obj.size)
169 if self.human_readable else str(obj.size))
170 num_bytes = obj.size
171 num_objs = 1
172
173 if not self.summary_only:
174 sys.stdout.write('%(size)-10s %(url)s%(ending)s' % {
175 'size': size_string,
176 'url': url_str.encode(UTF8),
177 'ending': self.line_ending})
178
179 return (num_objs, num_bytes)
180
181 def RunCommand(self):
182 """Command entry point for the du command."""
183 self.line_ending = '\n'
184 self.all_versions = False
185 self.produce_total = False
186 self.human_readable = False
187 self.summary_only = False
188 self.exclude_patterns = []
189 if self.sub_opts:
190 for o, a in self.sub_opts:
191 if o == '-0':
192 self.line_ending = '\0'
193 elif o == '-a':
194 self.all_versions = True
195 elif o == '-c':
196 self.produce_total = True
197 elif o == '-e':
198 self.exclude_patterns.append(a)
199 elif o == '-h':
200 self.human_readable = True
201 elif o == '-s':
202 self.summary_only = True
203 elif o == '-X':
204 if a == '-':
205 f = sys.stdin
206 else:
207 f = open(a, 'r')
208 try:
209 for line in f:
210 line = line.strip()
211 if line:
212 self.exclude_patterns.append(line)
213 finally:
214 f.close()
215
216 if not self.args:
217 # Default to listing all gs buckets.
218 self.args = ['gs://']
219
220 total_bytes = 0
221 got_nomatch_errors = False
222
223 def _PrintObjectLong(blr):
224 return self._PrintInfoAboutBucketListingRef(blr)
225
226 def _PrintNothing(unused_blr=None):
227 pass
228
229 def _PrintDirectory(num_bytes, name):
230 if not self.summary_only:
231 self._PrintSummaryLine(num_bytes, name)
232
233 for url_arg in self.args:
234 top_level_storage_url = StorageUrlFromString(url_arg)
235 if top_level_storage_url.IsFileUrl():
236 raise CommandException('Only cloud URLs are supported for %s'
237 % self.command_name)
238 bucket_listing_fields = ['size']
239
240 ls_helper = LsHelper(
241 self.WildcardIterator, self.logger,
242 print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing,
243 print_dir_header_func=_PrintNothing,
244 print_dir_summary_func=_PrintDirectory,
245 print_newline_func=_PrintNothing, all_versions=self.all_versions,
246 should_recurse=True, exclude_patterns=self.exclude_patterns,
247 fields=bucket_listing_fields)
248
249 # ls_helper expands to objects and prefixes, so perform a top-level
250 # expansion first.
251 if top_level_storage_url.IsProvider():
252 # Provider URL: use bucket wildcard to iterate over all buckets.
253 top_level_iter = self.WildcardIterator(
254 '%s://*' % top_level_storage_url.scheme).IterBuckets(
255 bucket_fields=['id'])
256 elif top_level_storage_url.IsBucket():
257 top_level_iter = self.WildcardIterator(
258 '%s://%s' % (top_level_storage_url.scheme,
259 top_level_storage_url.bucket_name)).IterBuckets(
260 bucket_fields=['id'])
261 else:
262 top_level_iter = [BucketListingObject(top_level_storage_url)]
263
264 for blr in top_level_iter:
265 storage_url = blr.storage_url
266 if storage_url.IsBucket() and self.summary_only:
267 storage_url = StorageUrlFromString(
268 storage_url.CreatePrefixUrl(wildcard_suffix='**'))
269 _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url)
270 if (storage_url.IsObject() and exp_objs == 0 and
271 ContainsWildcard(url_arg) and not self.exclude_patterns):
272 got_nomatch_errors = True
273 total_bytes += exp_bytes
274
275 if self.summary_only:
276 self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/'))
277
278 if self.produce_total:
279 self._PrintSummaryLine(total_bytes, 'total')
280
281 if got_nomatch_errors:
282 raise CommandException('One or more URLs matched no objects.')
283
284 return 0
OLDNEW
« no previous file with comments | « third_party/gsutil/gslib/commands/defacl.py ('k') | third_party/gsutil/gslib/commands/hash.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698