OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2013 Google Inc. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 """Implementation of Unix-like du command for cloud storage providers.""" | |
16 | |
17 from __future__ import absolute_import | |
18 | |
19 import sys | |
20 | |
21 from gslib.boto_translation import S3_DELETE_MARKER_GUID | |
22 from gslib.bucket_listing_ref import BucketListingObject | |
23 from gslib.command import Command | |
24 from gslib.command_argument import CommandArgument | |
25 from gslib.cs_api_map import ApiSelector | |
26 from gslib.exception import CommandException | |
27 from gslib.ls_helper import LsHelper | |
28 from gslib.storage_url import ContainsWildcard | |
29 from gslib.storage_url import StorageUrlFromString | |
30 from gslib.util import MakeHumanReadable | |
31 from gslib.util import NO_MAX | |
32 from gslib.util import UTF8 | |
33 | |
34 _SYNOPSIS = """ | |
35 gsutil du url... | |
36 """ | |
37 | |
38 _DETAILED_HELP_TEXT = (""" | |
39 <B>SYNOPSIS</B> | |
40 """ + _SYNOPSIS + """ | |
41 | |
42 | |
43 <B>DESCRIPTION</B> | |
44 The du command displays the amount of space (in bytes) being used by the | |
45 objects in the file or object hierarchy under a given URL. The syntax emulates | |
46 the Linux du command (which stands for disk usage). For example, the command: | |
47 | |
48 gsutil du -s gs://your-bucket/dir | |
49 | |
50 will report the total space used by all objects under gs://your-bucket/dir and | |
51 any sub-directories. | |
52 | |
53 | |
54 <B>OPTIONS</B> | |
55 -0 Ends each output line with a 0 byte rather than a newline. This | |
56 can be useful to make the output more easily machine-readable. | |
57 | |
58 -a Includes non-current object versions / generations in the listing | |
59 (only useful with a versioning-enabled bucket). Also prints | |
60 generation and metageneration for each listed object. | |
61 | |
62 -c Produce a grand total. | |
63 | |
64 -e A pattern to exclude from reporting. Example: -e "*.o" would | |
65 exclude any object that ends in ".o". Can be specified multiple | |
66 times. | |
67 | |
68 -h Prints object sizes in human-readable format (e.g., 1 KiB, | |
69 234 MiB, 2GiB, etc.) | |
70 | |
71 -s Display only a summary total for each argument. | |
72 | |
73 -X Similar to -e, but excludes patterns from the given file. The | |
74 patterns to exclude should be one per line. | |
75 | |
76 | |
77 <B>EXAMPLES</B> | |
78 To list the size of all objects in a bucket: | |
79 | |
80 gsutil du gs://bucketname | |
81 | |
82 To list the size of all objects underneath a prefix: | |
83 | |
84 gsutil du gs://bucketname/prefix/* | |
85 | |
86 To print the total number of bytes in a bucket, in human-readable form: | |
87 | |
88 gsutil du -ch gs://bucketname | |
89 | |
90 To see a summary of the total bytes in the two given buckets: | |
91 | |
92 gsutil du -s gs://bucket1 gs://bucket2 | |
93 | |
94 To list the size of all objects in a versioned bucket, including objects that | |
95 are not the latest: | |
96 | |
97 gsutil du -a gs://bucketname | |
98 | |
99 To list all objects in a bucket, except objects that end in ".bak", | |
100 with each object printed ending in a null byte: | |
101 | |
102 gsutil du -e "*.bak" -0 gs://bucketname | |
103 | |
104 To get a total of all buckets in a project with a grand total for an entire | |
105 project: | |
106 | |
107 gsutil -o GSUtil:default_project_id=project-name du -shc | |
108 """) | |
109 | |
110 | |
111 class DuCommand(Command): | |
112 """Implementation of gsutil du command.""" | |
113 | |
114 # Command specification. See base class for documentation. | |
115 command_spec = Command.CreateCommandSpec( | |
116 'du', | |
117 command_name_aliases=[], | |
118 usage_synopsis=_SYNOPSIS, | |
119 min_args=0, | |
120 max_args=NO_MAX, | |
121 supported_sub_args='0ace:hsX:', | |
122 file_url_ok=False, | |
123 provider_url_ok=True, | |
124 urls_start_arg=0, | |
125 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], | |
126 gs_default_api=ApiSelector.JSON, | |
127 argparse_arguments=[ | |
128 CommandArgument.MakeZeroOrMoreCloudURLsArgument() | |
129 ] | |
130 ) | |
131 # Help specification. See help_provider.py for documentation. | |
132 help_spec = Command.HelpSpec( | |
133 help_name='du', | |
134 help_name_aliases=[], | |
135 help_type='command_help', | |
136 help_one_line_summary='Display object size usage', | |
137 help_text=_DETAILED_HELP_TEXT, | |
138 subcommand_help_text={}, | |
139 ) | |
140 | |
141 def _PrintSummaryLine(self, num_bytes, name): | |
142 size_string = (MakeHumanReadable(num_bytes) | |
143 if self.human_readable else str(num_bytes)) | |
144 sys.stdout.write('%(size)-10s %(name)s%(ending)s' % { | |
145 'size': size_string, 'name': name, 'ending': self.line_ending}) | |
146 | |
147 def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref): | |
148 """Print listing info for given bucket_listing_ref. | |
149 | |
150 Args: | |
151 bucket_listing_ref: BucketListing being listed. | |
152 | |
153 Returns: | |
154 Tuple (number of objects, object size) | |
155 | |
156 Raises: | |
157 Exception: if calling bug encountered. | |
158 """ | |
159 obj = bucket_listing_ref.root_object | |
160 url_str = bucket_listing_ref.url_string | |
161 if (obj.metadata and S3_DELETE_MARKER_GUID in | |
162 obj.metadata.additionalProperties): | |
163 size_string = '0' | |
164 num_bytes = 0 | |
165 num_objs = 0 | |
166 url_str += '<DeleteMarker>' | |
167 else: | |
168 size_string = (MakeHumanReadable(obj.size) | |
169 if self.human_readable else str(obj.size)) | |
170 num_bytes = obj.size | |
171 num_objs = 1 | |
172 | |
173 if not self.summary_only: | |
174 sys.stdout.write('%(size)-10s %(url)s%(ending)s' % { | |
175 'size': size_string, | |
176 'url': url_str.encode(UTF8), | |
177 'ending': self.line_ending}) | |
178 | |
179 return (num_objs, num_bytes) | |
180 | |
181 def RunCommand(self): | |
182 """Command entry point for the du command.""" | |
183 self.line_ending = '\n' | |
184 self.all_versions = False | |
185 self.produce_total = False | |
186 self.human_readable = False | |
187 self.summary_only = False | |
188 self.exclude_patterns = [] | |
189 if self.sub_opts: | |
190 for o, a in self.sub_opts: | |
191 if o == '-0': | |
192 self.line_ending = '\0' | |
193 elif o == '-a': | |
194 self.all_versions = True | |
195 elif o == '-c': | |
196 self.produce_total = True | |
197 elif o == '-e': | |
198 self.exclude_patterns.append(a) | |
199 elif o == '-h': | |
200 self.human_readable = True | |
201 elif o == '-s': | |
202 self.summary_only = True | |
203 elif o == '-X': | |
204 if a == '-': | |
205 f = sys.stdin | |
206 else: | |
207 f = open(a, 'r') | |
208 try: | |
209 for line in f: | |
210 line = line.strip() | |
211 if line: | |
212 self.exclude_patterns.append(line) | |
213 finally: | |
214 f.close() | |
215 | |
216 if not self.args: | |
217 # Default to listing all gs buckets. | |
218 self.args = ['gs://'] | |
219 | |
220 total_bytes = 0 | |
221 got_nomatch_errors = False | |
222 | |
223 def _PrintObjectLong(blr): | |
224 return self._PrintInfoAboutBucketListingRef(blr) | |
225 | |
226 def _PrintNothing(unused_blr=None): | |
227 pass | |
228 | |
229 def _PrintDirectory(num_bytes, name): | |
230 if not self.summary_only: | |
231 self._PrintSummaryLine(num_bytes, name) | |
232 | |
233 for url_arg in self.args: | |
234 top_level_storage_url = StorageUrlFromString(url_arg) | |
235 if top_level_storage_url.IsFileUrl(): | |
236 raise CommandException('Only cloud URLs are supported for %s' | |
237 % self.command_name) | |
238 bucket_listing_fields = ['size'] | |
239 | |
240 ls_helper = LsHelper( | |
241 self.WildcardIterator, self.logger, | |
242 print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, | |
243 print_dir_header_func=_PrintNothing, | |
244 print_dir_summary_func=_PrintDirectory, | |
245 print_newline_func=_PrintNothing, all_versions=self.all_versions, | |
246 should_recurse=True, exclude_patterns=self.exclude_patterns, | |
247 fields=bucket_listing_fields) | |
248 | |
249 # ls_helper expands to objects and prefixes, so perform a top-level | |
250 # expansion first. | |
251 if top_level_storage_url.IsProvider(): | |
252 # Provider URL: use bucket wildcard to iterate over all buckets. | |
253 top_level_iter = self.WildcardIterator( | |
254 '%s://*' % top_level_storage_url.scheme).IterBuckets( | |
255 bucket_fields=['id']) | |
256 elif top_level_storage_url.IsBucket(): | |
257 top_level_iter = self.WildcardIterator( | |
258 '%s://%s' % (top_level_storage_url.scheme, | |
259 top_level_storage_url.bucket_name)).IterBuckets( | |
260 bucket_fields=['id']) | |
261 else: | |
262 top_level_iter = [BucketListingObject(top_level_storage_url)] | |
263 | |
264 for blr in top_level_iter: | |
265 storage_url = blr.storage_url | |
266 if storage_url.IsBucket() and self.summary_only: | |
267 storage_url = StorageUrlFromString( | |
268 storage_url.CreatePrefixUrl(wildcard_suffix='**')) | |
269 _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url) | |
270 if (storage_url.IsObject() and exp_objs == 0 and | |
271 ContainsWildcard(url_arg) and not self.exclude_patterns): | |
272 got_nomatch_errors = True | |
273 total_bytes += exp_bytes | |
274 | |
275 if self.summary_only: | |
276 self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/')) | |
277 | |
278 if self.produce_total: | |
279 self._PrintSummaryLine(total_bytes, 'total') | |
280 | |
281 if got_nomatch_errors: | |
282 raise CommandException('One or more URLs matched no objects.') | |
283 | |
284 return 0 | |
OLD | NEW |