OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2011 Google Inc. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 """Implementation of Unix-like ls command for cloud storage providers.""" | |
16 | |
17 from __future__ import absolute_import | |
18 | |
19 import re | |
20 | |
21 from gslib.boto_translation import S3_DELETE_MARKER_GUID | |
22 from gslib.cloud_api import NotFoundException | |
23 from gslib.command import Command | |
24 from gslib.command_argument import CommandArgument | |
25 from gslib.cs_api_map import ApiSelector | |
26 from gslib.exception import CommandException | |
27 from gslib.ls_helper import LsHelper | |
28 from gslib.storage_url import ContainsWildcard | |
29 from gslib.storage_url import StorageUrlFromString | |
30 from gslib.translation_helper import AclTranslation | |
31 from gslib.util import ListingStyle | |
32 from gslib.util import MakeHumanReadable | |
33 from gslib.util import NO_MAX | |
34 from gslib.util import PrintFullInfoAboutObject | |
35 from gslib.util import UTF8 | |
36 | |
37 | |
38 # Regex that assists with converting JSON timestamp to ls-style output. | |
39 # This excludes timestamp fractional seconds, for example: | |
40 # 2013-07-03 20:32:53.048000+00:00 | |
41 JSON_TIMESTAMP_RE = re.compile(r'([^\s]*)\s([^\.\+]*).*') | |
42 | |
43 _SYNOPSIS = """ | |
44 gsutil ls [-a] [-b] [-l] [-L] [-r] [-p proj_id] url... | |
45 """ | |
46 | |
47 _DETAILED_HELP_TEXT = (""" | |
48 <B>SYNOPSIS</B> | |
49 """ + _SYNOPSIS + """ | |
50 | |
51 | |
52 <B>LISTING PROVIDERS, BUCKETS, SUBDIRECTORIES, AND OBJECTS</B> | |
53 If you run gsutil ls without URLs, it lists all of the Google Cloud Storage | |
54 buckets under your default project ID: | |
55 | |
56 gsutil ls | |
57 | |
58 (For details about projects, see "gsutil help projects" and also the -p | |
59 option in the OPTIONS section below.) | |
60 | |
61 If you specify one or more provider URLs, gsutil ls will list buckets at | |
62 each listed provider: | |
63 | |
64 gsutil ls gs:// | |
65 | |
66 If you specify bucket URLs, gsutil ls will list objects at the top level of | |
67 each bucket, along with the names of each subdirectory. For example: | |
68 | |
69 gsutil ls gs://bucket | |
70 | |
71 might produce output like: | |
72 | |
73 gs://bucket/obj1.htm | |
74 gs://bucket/obj2.htm | |
75 gs://bucket/images1/ | |
76 gs://bucket/images2/ | |
77 | |
78 The "/" at the end of the last 2 URLs tells you they are subdirectories, | |
79 which you can list using: | |
80 | |
81 gsutil ls gs://bucket/images* | |
82 | |
83 If you specify object URLs, gsutil ls will list the specified objects. For | |
84 example: | |
85 | |
86 gsutil ls gs://bucket/*.txt | |
87 | |
88 will list all files whose name matches the above wildcard at the top level | |
89 of the bucket. | |
90 | |
91 See "gsutil help wildcards" for more details on working with wildcards. | |
92 | |
93 | |
94 <B>DIRECTORY BY DIRECTORY, FLAT, and RECURSIVE LISTINGS</B> | |
95 Listing a bucket or subdirectory (as illustrated near the end of the previous | |
96 section) only shows the objects and names of subdirectories it contains. You | |
97 can list all objects in a bucket by using the -r option. For example: | |
98 | |
99 gsutil ls -r gs://bucket | |
100 | |
101 will list the top-level objects and buckets, then the objects and | |
102 buckets under gs://bucket/images1, then those under gs://bucket/images2, etc. | |
103 | |
104 If you want to see all objects in the bucket in one "flat" listing use the | |
105 recursive ("**") wildcard, like: | |
106 | |
107 gsutil ls -r gs://bucket/** | |
108 | |
109 or, for a flat listing of a subdirectory: | |
110 | |
111 gsutil ls -r gs://bucket/dir/** | |
112 | |
113 | |
114 <B>LISTING OBJECT DETAILS</B> | |
115 If you specify the -l option, gsutil will output additional information | |
116 about each matching provider, bucket, subdirectory, or object. For example: | |
117 | |
118 gsutil ls -l gs://bucket/*.txt | |
119 | |
120 will print the object size, creation time stamp, and name of each matching | |
121 object, along with the total count and sum of sizes of all matching objects: | |
122 | |
123 2276224 2012-03-02T19:25:17Z gs://bucket/obj1 | |
124 3914624 2012-03-02T19:30:27Z gs://bucket/obj2 | |
125 TOTAL: 2 objects, 6190848 bytes (5.9 MiB) | |
126 | |
127 Note that the total listed in parentheses above is in mebibytes (or gibibytes, | |
128 tebibytes, etc.), which corresponds to the unit of billing measurement for | |
129 Google Cloud Storage. | |
130 | |
131 You can get a listing of all the objects in the top-level bucket directory | |
132 (along with the total count and sum of sizes) using a command like: | |
133 | |
134 gsutil ls -l gs://bucket | |
135 | |
136 To print additional detail about objects and buckets use the gsutil ls -L | |
137 option. For example: | |
138 | |
139 gsutil ls -L gs://bucket/obj1 | |
140 | |
141 will print something like: | |
142 | |
143 gs://bucket/obj1: | |
144 Creation Time: Fri, 02 Mar 2012 19:25:17 GMT | |
145 Size: 2276224 | |
146 Cache-Control: private, max-age=0 | |
147 Content-Type: application/x-executable | |
148 ETag: 5ca6796417570a586723b7344afffc81 | |
149 Generation: 1378862725952000 | |
150 Metageneration: 1 | |
151 ACL: | |
152 [ | |
153 { | |
154 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac
45f66ad70", | |
155 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f6
6ad70", | |
156 "role": "OWNER" | |
157 } | |
158 ] | |
159 TOTAL: 1 objects, 2276224 bytes (2.17 MiB) | |
160 | |
161 See also "gsutil help acl" for getting a more readable version of the ACL. | |
162 | |
163 | |
164 <B>LISTING BUCKET DETAILS</B> | |
165 If you want to see information about the bucket itself, use the -b | |
166 option. For example: | |
167 | |
168 gsutil ls -L -b gs://bucket | |
169 | |
170 will print something like: | |
171 | |
172 gs://bucket/ : | |
173 StorageClass: STANDARD | |
174 LocationConstraint: US | |
175 Versioning enabled: True | |
176 Logging: None | |
177 WebsiteConfiguration: None | |
178 CORS configuration: Present | |
179 Lifecycle configuration: None | |
180 [ | |
181 { | |
182 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac
45f66ad70", | |
183 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f6
6ad70", | |
184 "role": "OWNER" | |
185 } | |
186 ] | |
187 Default ACL: | |
188 [ | |
189 { | |
190 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac
45f66ad70", | |
191 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f6
6ad70", | |
192 "role": "OWNER" | |
193 } | |
194 ] | |
195 | |
196 | |
197 <B>OPTIONS</B> | |
198 -l Prints long listing (owner, length). | |
199 | |
200 -L Prints even more detail than -l. Note: If you use this option | |
201 with the (non-default) XML API it will generate an additional | |
202 request per object being listed, which makes the -L option run | |
203 much more slowly (and cost more) using the XML API than the | |
204 default JSON API. | |
205 | |
206 -b Prints info about the bucket when used with a bucket URL. | |
207 | |
208 -h When used with -l, prints object sizes in human readable format | |
209 (e.g., 1 KiB, 234 MiB, 2 GiB, etc.) | |
210 | |
211 -p proj_id Specifies the project ID to use for listing buckets. | |
212 | |
213 -R, -r Requests a recursive listing. | |
214 | |
215 -a Includes non-current object versions / generations in the listing | |
216 (only useful with a versioning-enabled bucket). If combined with | |
217 -l option also prints metageneration for each listed object. | |
218 | |
219 -e Include ETag in long listing (-l) output. | |
220 """) | |
221 | |
222 | |
223 class LsCommand(Command): | |
224 """Implementation of gsutil ls command.""" | |
225 | |
226 # Command specification. See base class for documentation. | |
227 command_spec = Command.CreateCommandSpec( | |
228 'ls', | |
229 command_name_aliases=['dir', 'list'], | |
230 usage_synopsis=_SYNOPSIS, | |
231 min_args=0, | |
232 max_args=NO_MAX, | |
233 supported_sub_args='aeblLhp:rR', | |
234 file_url_ok=False, | |
235 provider_url_ok=True, | |
236 urls_start_arg=0, | |
237 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], | |
238 gs_default_api=ApiSelector.JSON, | |
239 argparse_arguments=[ | |
240 CommandArgument.MakeZeroOrMoreCloudURLsArgument() | |
241 ] | |
242 ) | |
243 # Help specification. See help_provider.py for documentation. | |
244 help_spec = Command.HelpSpec( | |
245 help_name='ls', | |
246 help_name_aliases=['dir', 'list'], | |
247 help_type='command_help', | |
248 help_one_line_summary='List providers, buckets, or objects', | |
249 help_text=_DETAILED_HELP_TEXT, | |
250 subcommand_help_text={}, | |
251 ) | |
252 | |
253 def _PrintBucketInfo(self, bucket_blr, listing_style): | |
254 """Print listing info for given bucket. | |
255 | |
256 Args: | |
257 bucket_blr: BucketListingReference for the bucket being listed | |
258 listing_style: ListingStyle enum describing type of output desired. | |
259 | |
260 Returns: | |
261 Tuple (total objects, total bytes) in the bucket. | |
262 """ | |
263 if (listing_style == ListingStyle.SHORT or | |
264 listing_style == ListingStyle.LONG): | |
265 print bucket_blr | |
266 return | |
267 # listing_style == ListingStyle.LONG_LONG: | |
268 # We're guaranteed by the caller that the root object is populated. | |
269 bucket = bucket_blr.root_object | |
270 location_constraint = bucket.location | |
271 storage_class = bucket.storageClass | |
272 fields = {'bucket': bucket_blr.url_string, | |
273 'storage_class': storage_class, | |
274 'location_constraint': location_constraint, | |
275 'acl': AclTranslation.JsonFromMessage(bucket.acl), | |
276 'default_acl': AclTranslation.JsonFromMessage( | |
277 bucket.defaultObjectAcl)} | |
278 | |
279 fields['versioning'] = bucket.versioning and bucket.versioning.enabled | |
280 fields['website_config'] = 'Present' if bucket.website else 'None' | |
281 fields['logging_config'] = 'Present' if bucket.logging else 'None' | |
282 fields['cors_config'] = 'Present' if bucket.cors else 'None' | |
283 fields['lifecycle_config'] = 'Present' if bucket.lifecycle else 'None' | |
284 | |
285 # For field values that are multiline, add indenting to make it look | |
286 # prettier. | |
287 for key in fields: | |
288 previous_value = fields[key] | |
289 if (not isinstance(previous_value, basestring) or | |
290 '\n' not in previous_value): | |
291 continue | |
292 new_value = previous_value.replace('\n', '\n\t ') | |
293 # Start multiline values on a new line if they aren't already. | |
294 if not new_value.startswith('\n'): | |
295 new_value = '\n\t ' + new_value | |
296 fields[key] = new_value | |
297 | |
298 print('{bucket} :\n' | |
299 '\tStorage class:\t\t\t{storage_class}\n' | |
300 '\tLocation constraint:\t\t{location_constraint}\n' | |
301 '\tVersioning enabled:\t\t{versioning}\n' | |
302 '\tLogging configuration:\t\t{logging_config}\n' | |
303 '\tWebsite configuration:\t\t{website_config}\n' | |
304 '\tCORS configuration: \t\t{cors_config}\n' | |
305 '\tLifecycle configuration:\t{lifecycle_config}\n' | |
306 '\tACL:\t\t\t\t{acl}\n' | |
307 '\tDefault ACL:\t\t\t{default_acl}'.format(**fields)) | |
308 if bucket_blr.storage_url.scheme == 's3': | |
309 print('Note: this is an S3 bucket so configuration values may be ' | |
310 'blank. To retrieve bucket configuration values, use ' | |
311 'individual configuration commands such as gsutil acl get ' | |
312 '<bucket>.') | |
313 | |
314 def _PrintLongListing(self, bucket_listing_ref): | |
315 """Prints an object with ListingStyle.LONG.""" | |
316 obj = bucket_listing_ref.root_object | |
317 url_str = bucket_listing_ref.url_string | |
318 if (obj.metadata and S3_DELETE_MARKER_GUID in | |
319 obj.metadata.additionalProperties): | |
320 size_string = '0' | |
321 num_bytes = 0 | |
322 num_objs = 0 | |
323 url_str += '<DeleteMarker>' | |
324 else: | |
325 size_string = (MakeHumanReadable(obj.size) | |
326 if self.human_readable else str(obj.size)) | |
327 num_bytes = obj.size | |
328 num_objs = 1 | |
329 | |
330 timestamp = JSON_TIMESTAMP_RE.sub( | |
331 r'\1T\2Z', str(obj.updated).decode(UTF8).encode('ascii')) | |
332 printstr = '%(size)10s %(timestamp)s %(url)s' | |
333 encoded_etag = None | |
334 encoded_metagen = None | |
335 if self.all_versions: | |
336 printstr += ' metageneration=%(metageneration)s' | |
337 encoded_metagen = str(obj.metageneration).encode(UTF8) | |
338 if self.include_etag: | |
339 printstr += ' etag=%(etag)s' | |
340 encoded_etag = obj.etag.encode(UTF8) | |
341 format_args = { | |
342 'size': size_string, | |
343 'timestamp': timestamp, | |
344 'url': url_str.encode(UTF8), | |
345 'metageneration': encoded_metagen, | |
346 'etag': encoded_etag | |
347 } | |
348 print printstr % format_args | |
349 return (num_objs, num_bytes) | |
350 | |
351 def RunCommand(self): | |
352 """Command entry point for the ls command.""" | |
353 got_nomatch_errors = False | |
354 got_bucket_nomatch_errors = False | |
355 listing_style = ListingStyle.SHORT | |
356 get_bucket_info = False | |
357 self.recursion_requested = False | |
358 self.all_versions = False | |
359 self.include_etag = False | |
360 self.human_readable = False | |
361 if self.sub_opts: | |
362 for o, a in self.sub_opts: | |
363 if o == '-a': | |
364 self.all_versions = True | |
365 elif o == '-e': | |
366 self.include_etag = True | |
367 elif o == '-b': | |
368 get_bucket_info = True | |
369 elif o == '-h': | |
370 self.human_readable = True | |
371 elif o == '-l': | |
372 listing_style = ListingStyle.LONG | |
373 elif o == '-L': | |
374 listing_style = ListingStyle.LONG_LONG | |
375 elif o == '-p': | |
376 self.project_id = a | |
377 elif o == '-r' or o == '-R': | |
378 self.recursion_requested = True | |
379 | |
380 if not self.args: | |
381 # default to listing all gs buckets | |
382 self.args = ['gs://'] | |
383 | |
384 total_objs = 0 | |
385 total_bytes = 0 | |
386 | |
387 def MaybePrintBucketHeader(blr): | |
388 if len(self.args) > 1: | |
389 print '%s:' % blr.url_string.encode(UTF8) | |
390 print_bucket_header = MaybePrintBucketHeader | |
391 | |
392 for url_str in self.args: | |
393 storage_url = StorageUrlFromString(url_str) | |
394 if storage_url.IsFileUrl(): | |
395 raise CommandException('Only cloud URLs are supported for %s' | |
396 % self.command_name) | |
397 bucket_fields = None | |
398 if (listing_style == ListingStyle.SHORT or | |
399 listing_style == ListingStyle.LONG): | |
400 bucket_fields = ['id'] | |
401 elif listing_style == ListingStyle.LONG_LONG: | |
402 bucket_fields = ['location', 'storageClass', 'versioning', 'acl', | |
403 'defaultObjectAcl', 'website', 'logging', 'cors', | |
404 'lifecycle'] | |
405 if storage_url.IsProvider(): | |
406 # Provider URL: use bucket wildcard to list buckets. | |
407 for blr in self.WildcardIterator( | |
408 '%s://*' % storage_url.scheme).IterBuckets( | |
409 bucket_fields=bucket_fields): | |
410 self._PrintBucketInfo(blr, listing_style) | |
411 elif storage_url.IsBucket() and get_bucket_info: | |
412 # ls -b bucket listing request: List info about bucket(s). | |
413 total_buckets = 0 | |
414 for blr in self.WildcardIterator(url_str).IterBuckets( | |
415 bucket_fields=bucket_fields): | |
416 if not ContainsWildcard(url_str) and not blr.root_object: | |
417 # Iterator does not make an HTTP call for non-wildcarded | |
418 # listings with fields=='id'. Ensure the bucket exists by calling | |
419 # GetBucket. | |
420 self.gsutil_api.GetBucket( | |
421 blr.storage_url.bucket_name, | |
422 fields=['id'], provider=storage_url.scheme) | |
423 self._PrintBucketInfo(blr, listing_style) | |
424 total_buckets += 1 | |
425 if not ContainsWildcard(url_str) and not total_buckets: | |
426 got_bucket_nomatch_errors = True | |
427 else: | |
428 # URL names a bucket, object, or object subdir -> | |
429 # list matching object(s) / subdirs. | |
430 def _PrintPrefixLong(blr): | |
431 print '%-33s%s' % ('', blr.url_string.encode(UTF8)) | |
432 | |
433 if listing_style == ListingStyle.SHORT: | |
434 # ls helper by default readies us for a short listing. | |
435 ls_helper = LsHelper(self.WildcardIterator, self.logger, | |
436 all_versions=self.all_versions, | |
437 print_bucket_header_func=print_bucket_header, | |
438 should_recurse=self.recursion_requested) | |
439 elif listing_style == ListingStyle.LONG: | |
440 bucket_listing_fields = ['name', 'updated', 'size'] | |
441 if self.all_versions: | |
442 bucket_listing_fields.extend(['generation', 'metageneration']) | |
443 if self.include_etag: | |
444 bucket_listing_fields.append('etag') | |
445 | |
446 ls_helper = LsHelper(self.WildcardIterator, self.logger, | |
447 print_object_func=self._PrintLongListing, | |
448 print_dir_func=_PrintPrefixLong, | |
449 print_bucket_header_func=print_bucket_header, | |
450 all_versions=self.all_versions, | |
451 should_recurse=self.recursion_requested, | |
452 fields=bucket_listing_fields) | |
453 | |
454 elif listing_style == ListingStyle.LONG_LONG: | |
455 # List all fields | |
456 bucket_listing_fields = None | |
457 ls_helper = LsHelper(self.WildcardIterator, self.logger, | |
458 print_object_func=PrintFullInfoAboutObject, | |
459 print_dir_func=_PrintPrefixLong, | |
460 print_bucket_header_func=print_bucket_header, | |
461 all_versions=self.all_versions, | |
462 should_recurse=self.recursion_requested, | |
463 fields=bucket_listing_fields) | |
464 else: | |
465 raise CommandException('Unknown listing style: %s' % listing_style) | |
466 | |
467 exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url) | |
468 if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: | |
469 got_nomatch_errors = True | |
470 total_bytes += exp_bytes | |
471 total_objs += exp_objs | |
472 | |
473 if total_objs and listing_style != ListingStyle.SHORT: | |
474 print ('TOTAL: %d objects, %d bytes (%s)' % | |
475 (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) | |
476 if got_nomatch_errors: | |
477 raise CommandException('One or more URLs matched no objects.') | |
478 if got_bucket_nomatch_errors: | |
479 raise NotFoundException('One or more bucket URLs matched no buckets.') | |
480 | |
481 return 0 | |
OLD | NEW |