OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2011 Google Inc. All Rights Reserved. |
| 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at |
| 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. |
| 15 """Implementation of Unix-like ls command for cloud storage providers.""" |
| 16 |
| 17 from __future__ import absolute_import |
| 18 |
| 19 import re |
| 20 |
| 21 from gslib.boto_translation import S3_DELETE_MARKER_GUID |
| 22 from gslib.cloud_api import NotFoundException |
| 23 from gslib.command import Command |
| 24 from gslib.command_argument import CommandArgument |
| 25 from gslib.cs_api_map import ApiSelector |
| 26 from gslib.exception import CommandException |
| 27 from gslib.ls_helper import LsHelper |
| 28 from gslib.storage_url import ContainsWildcard |
| 29 from gslib.storage_url import StorageUrlFromString |
| 30 from gslib.translation_helper import AclTranslation |
| 31 from gslib.util import ListingStyle |
| 32 from gslib.util import MakeHumanReadable |
| 33 from gslib.util import NO_MAX |
| 34 from gslib.util import PrintFullInfoAboutObject |
| 35 from gslib.util import UTF8 |
| 36 |
| 37 |
| 38 # Regex that assists with converting JSON timestamp to ls-style output. |
| 39 # This excludes timestamp fractional seconds, for example: |
| 40 # 2013-07-03 20:32:53.048000+00:00 |
| 41 JSON_TIMESTAMP_RE = re.compile(r'([^\s]*)\s([^\.\+]*).*') |
| 42 |
| 43 _SYNOPSIS = """ |
| 44 gsutil ls [-a] [-b] [-l] [-L] [-r] [-p proj_id] url... |
| 45 """ |
| 46 |
| 47 _DETAILED_HELP_TEXT = (""" |
| 48 <B>SYNOPSIS</B> |
| 49 """ + _SYNOPSIS + """ |
| 50 |
| 51 |
| 52 <B>LISTING PROVIDERS, BUCKETS, SUBDIRECTORIES, AND OBJECTS</B> |
| 53 If you run gsutil ls without URLs, it lists all of the Google Cloud Storage |
| 54 buckets under your default project ID: |
| 55 |
| 56 gsutil ls |
| 57 |
| 58 (For details about projects, see "gsutil help projects" and also the -p |
| 59 option in the OPTIONS section below.) |
| 60 |
| 61 If you specify one or more provider URLs, gsutil ls will list buckets at |
| 62 each listed provider: |
| 63 |
| 64 gsutil ls gs:// |
| 65 |
| 66 If you specify bucket URLs, gsutil ls will list objects at the top level of |
| 67 each bucket, along with the names of each subdirectory. For example: |
| 68 |
| 69 gsutil ls gs://bucket |
| 70 |
| 71 might produce output like: |
| 72 |
| 73 gs://bucket/obj1.htm |
| 74 gs://bucket/obj2.htm |
| 75 gs://bucket/images1/ |
| 76 gs://bucket/images2/ |
| 77 |
| 78 The "/" at the end of the last 2 URLs tells you they are subdirectories, |
| 79 which you can list using: |
| 80 |
| 81 gsutil ls gs://bucket/images* |
| 82 |
| 83 If you specify object URLs, gsutil ls will list the specified objects. For |
| 84 example: |
| 85 |
| 86 gsutil ls gs://bucket/*.txt |
| 87 |
| 88 will list all files whose name matches the above wildcard at the top level |
| 89 of the bucket. |
| 90 |
| 91 See "gsutil help wildcards" for more details on working with wildcards. |
| 92 |
| 93 |
| 94 <B>DIRECTORY BY DIRECTORY, FLAT, and RECURSIVE LISTINGS</B> |
| 95 Listing a bucket or subdirectory (as illustrated near the end of the previous |
| 96 section) only shows the objects and names of subdirectories it contains. You |
| 97 can list all objects in a bucket by using the -r option. For example: |
| 98 |
| 99 gsutil ls -r gs://bucket |
| 100 |
| 101 will list the top-level objects and buckets, then the objects and |
| 102 buckets under gs://bucket/images1, then those under gs://bucket/images2, etc. |
| 103 |
| 104 If you want to see all objects in the bucket in one "flat" listing use the |
| 105 recursive ("**") wildcard, like: |
| 106 |
| 107 gsutil ls -r gs://bucket/** |
| 108 |
| 109 or, for a flat listing of a subdirectory: |
| 110 |
| 111 gsutil ls -r gs://bucket/dir/** |
| 112 |
| 113 |
| 114 <B>LISTING OBJECT DETAILS</B> |
| 115 If you specify the -l option, gsutil will output additional information |
| 116 about each matching provider, bucket, subdirectory, or object. For example: |
| 117 |
| 118 gsutil ls -l gs://bucket/*.txt |
| 119 |
| 120 will print the object size, creation time stamp, and name of each matching |
| 121 object, along with the total count and sum of sizes of all matching objects: |
| 122 |
| 123 2276224 2012-03-02T19:25:17Z gs://bucket/obj1 |
| 124 3914624 2012-03-02T19:30:27Z gs://bucket/obj2 |
| 125 TOTAL: 2 objects, 6190848 bytes (5.9 MiB) |
| 126 |
| 127 Note that the total listed in parentheses above is in mebibytes (or gibibytes, |
| 128 tebibytes, etc.), which corresponds to the unit of billing measurement for |
| 129 Google Cloud Storage. |
| 130 |
| 131 You can get a listing of all the objects in the top-level bucket directory |
| 132 (along with the total count and sum of sizes) using a command like: |
| 133 |
| 134 gsutil ls -l gs://bucket |
| 135 |
| 136 To print additional detail about objects and buckets use the gsutil ls -L |
| 137 option. For example: |
| 138 |
| 139 gsutil ls -L gs://bucket/obj1 |
| 140 |
| 141 will print something like: |
| 142 |
| 143 gs://bucket/obj1: |
| 144 Creation Time: Fri, 02 Mar 2012 19:25:17 GMT |
| 145 Size: 2276224 |
| 146 Cache-Control: private, max-age=0 |
| 147 Content-Type: application/x-executable |
| 148 ETag: 5ca6796417570a586723b7344afffc81 |
| 149 Generation: 1378862725952000 |
| 150 Metageneration: 1 |
| 151 ACL: |
| 152 [ |
| 153 { |
| 154 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac
45f66ad70", |
| 155 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f6
6ad70", |
| 156 "role": "OWNER" |
| 157 } |
| 158 ] |
| 159 TOTAL: 1 objects, 2276224 bytes (2.17 MiB) |
| 160 |
| 161 See also "gsutil help acl" for getting a more readable version of the ACL. |
| 162 |
| 163 |
| 164 <B>LISTING BUCKET DETAILS</B> |
| 165 If you want to see information about the bucket itself, use the -b |
| 166 option. For example: |
| 167 |
| 168 gsutil ls -L -b gs://bucket |
| 169 |
| 170 will print something like: |
| 171 |
| 172 gs://bucket/ : |
| 173 StorageClass: STANDARD |
| 174 LocationConstraint: US |
| 175 Versioning enabled: True |
| 176 Logging: None |
| 177 WebsiteConfiguration: None |
| 178 CORS configuration: Present |
| 179 Lifecycle configuration: None |
| 180 [ |
| 181 { |
| 182 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac
45f66ad70", |
| 183 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f6
6ad70", |
| 184 "role": "OWNER" |
| 185 } |
| 186 ] |
| 187 Default ACL: |
| 188 [ |
| 189 { |
| 190 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac
45f66ad70", |
| 191 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f6
6ad70", |
| 192 "role": "OWNER" |
| 193 } |
| 194 ] |
| 195 |
| 196 |
| 197 <B>OPTIONS</B> |
| 198 -l Prints long listing (owner, length). |
| 199 |
| 200 -L Prints even more detail than -l. Note: If you use this option |
| 201 with the (non-default) XML API it will generate an additional |
| 202 request per object being listed, which makes the -L option run |
| 203 much more slowly (and cost more) using the XML API than the |
| 204 default JSON API. |
| 205 |
| 206 -b Prints info about the bucket when used with a bucket URL. |
| 207 |
| 208 -h When used with -l, prints object sizes in human readable format |
| 209 (e.g., 1 KiB, 234 MiB, 2 GiB, etc.) |
| 210 |
| 211 -p proj_id Specifies the project ID to use for listing buckets. |
| 212 |
| 213 -R, -r Requests a recursive listing. |
| 214 |
| 215 -a Includes non-current object versions / generations in the listing |
| 216 (only useful with a versioning-enabled bucket). If combined with |
| 217 -l option also prints metageneration for each listed object. |
| 218 |
| 219 -e Include ETag in long listing (-l) output. |
| 220 """) |
| 221 |
| 222 |
| 223 class LsCommand(Command): |
| 224 """Implementation of gsutil ls command.""" |
| 225 |
| 226 # Command specification. See base class for documentation. |
| 227 command_spec = Command.CreateCommandSpec( |
| 228 'ls', |
| 229 command_name_aliases=['dir', 'list'], |
| 230 usage_synopsis=_SYNOPSIS, |
| 231 min_args=0, |
| 232 max_args=NO_MAX, |
| 233 supported_sub_args='aeblLhp:rR', |
| 234 file_url_ok=False, |
| 235 provider_url_ok=True, |
| 236 urls_start_arg=0, |
| 237 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| 238 gs_default_api=ApiSelector.JSON, |
| 239 argparse_arguments=[ |
| 240 CommandArgument.MakeZeroOrMoreCloudURLsArgument() |
| 241 ] |
| 242 ) |
| 243 # Help specification. See help_provider.py for documentation. |
| 244 help_spec = Command.HelpSpec( |
| 245 help_name='ls', |
| 246 help_name_aliases=['dir', 'list'], |
| 247 help_type='command_help', |
| 248 help_one_line_summary='List providers, buckets, or objects', |
| 249 help_text=_DETAILED_HELP_TEXT, |
| 250 subcommand_help_text={}, |
| 251 ) |
| 252 |
| 253 def _PrintBucketInfo(self, bucket_blr, listing_style): |
| 254 """Print listing info for given bucket. |
| 255 |
| 256 Args: |
| 257 bucket_blr: BucketListingReference for the bucket being listed |
| 258 listing_style: ListingStyle enum describing type of output desired. |
| 259 |
| 260 Returns: |
| 261 Tuple (total objects, total bytes) in the bucket. |
| 262 """ |
| 263 if (listing_style == ListingStyle.SHORT or |
| 264 listing_style == ListingStyle.LONG): |
| 265 print bucket_blr |
| 266 return |
| 267 # listing_style == ListingStyle.LONG_LONG: |
| 268 # We're guaranteed by the caller that the root object is populated. |
| 269 bucket = bucket_blr.root_object |
| 270 location_constraint = bucket.location |
| 271 storage_class = bucket.storageClass |
| 272 fields = {'bucket': bucket_blr.url_string, |
| 273 'storage_class': storage_class, |
| 274 'location_constraint': location_constraint, |
| 275 'acl': AclTranslation.JsonFromMessage(bucket.acl), |
| 276 'default_acl': AclTranslation.JsonFromMessage( |
| 277 bucket.defaultObjectAcl)} |
| 278 |
| 279 fields['versioning'] = bucket.versioning and bucket.versioning.enabled |
| 280 fields['website_config'] = 'Present' if bucket.website else 'None' |
| 281 fields['logging_config'] = 'Present' if bucket.logging else 'None' |
| 282 fields['cors_config'] = 'Present' if bucket.cors else 'None' |
| 283 fields['lifecycle_config'] = 'Present' if bucket.lifecycle else 'None' |
| 284 |
| 285 # For field values that are multiline, add indenting to make it look |
| 286 # prettier. |
| 287 for key in fields: |
| 288 previous_value = fields[key] |
| 289 if (not isinstance(previous_value, basestring) or |
| 290 '\n' not in previous_value): |
| 291 continue |
| 292 new_value = previous_value.replace('\n', '\n\t ') |
| 293 # Start multiline values on a new line if they aren't already. |
| 294 if not new_value.startswith('\n'): |
| 295 new_value = '\n\t ' + new_value |
| 296 fields[key] = new_value |
| 297 |
| 298 print('{bucket} :\n' |
| 299 '\tStorage class:\t\t\t{storage_class}\n' |
| 300 '\tLocation constraint:\t\t{location_constraint}\n' |
| 301 '\tVersioning enabled:\t\t{versioning}\n' |
| 302 '\tLogging configuration:\t\t{logging_config}\n' |
| 303 '\tWebsite configuration:\t\t{website_config}\n' |
| 304 '\tCORS configuration: \t\t{cors_config}\n' |
| 305 '\tLifecycle configuration:\t{lifecycle_config}\n' |
| 306 '\tACL:\t\t\t\t{acl}\n' |
| 307 '\tDefault ACL:\t\t\t{default_acl}'.format(**fields)) |
| 308 if bucket_blr.storage_url.scheme == 's3': |
| 309 print('Note: this is an S3 bucket so configuration values may be ' |
| 310 'blank. To retrieve bucket configuration values, use ' |
| 311 'individual configuration commands such as gsutil acl get ' |
| 312 '<bucket>.') |
| 313 |
| 314 def _PrintLongListing(self, bucket_listing_ref): |
| 315 """Prints an object with ListingStyle.LONG.""" |
| 316 obj = bucket_listing_ref.root_object |
| 317 url_str = bucket_listing_ref.url_string |
| 318 if (obj.metadata and S3_DELETE_MARKER_GUID in |
| 319 obj.metadata.additionalProperties): |
| 320 size_string = '0' |
| 321 num_bytes = 0 |
| 322 num_objs = 0 |
| 323 url_str += '<DeleteMarker>' |
| 324 else: |
| 325 size_string = (MakeHumanReadable(obj.size) |
| 326 if self.human_readable else str(obj.size)) |
| 327 num_bytes = obj.size |
| 328 num_objs = 1 |
| 329 |
| 330 timestamp = JSON_TIMESTAMP_RE.sub( |
| 331 r'\1T\2Z', str(obj.updated).decode(UTF8).encode('ascii')) |
| 332 printstr = '%(size)10s %(timestamp)s %(url)s' |
| 333 encoded_etag = None |
| 334 encoded_metagen = None |
| 335 if self.all_versions: |
| 336 printstr += ' metageneration=%(metageneration)s' |
| 337 encoded_metagen = str(obj.metageneration).encode(UTF8) |
| 338 if self.include_etag: |
| 339 printstr += ' etag=%(etag)s' |
| 340 encoded_etag = obj.etag.encode(UTF8) |
| 341 format_args = { |
| 342 'size': size_string, |
| 343 'timestamp': timestamp, |
| 344 'url': url_str.encode(UTF8), |
| 345 'metageneration': encoded_metagen, |
| 346 'etag': encoded_etag |
| 347 } |
| 348 print printstr % format_args |
| 349 return (num_objs, num_bytes) |
| 350 |
| 351 def RunCommand(self): |
| 352 """Command entry point for the ls command.""" |
| 353 got_nomatch_errors = False |
| 354 got_bucket_nomatch_errors = False |
| 355 listing_style = ListingStyle.SHORT |
| 356 get_bucket_info = False |
| 357 self.recursion_requested = False |
| 358 self.all_versions = False |
| 359 self.include_etag = False |
| 360 self.human_readable = False |
| 361 if self.sub_opts: |
| 362 for o, a in self.sub_opts: |
| 363 if o == '-a': |
| 364 self.all_versions = True |
| 365 elif o == '-e': |
| 366 self.include_etag = True |
| 367 elif o == '-b': |
| 368 get_bucket_info = True |
| 369 elif o == '-h': |
| 370 self.human_readable = True |
| 371 elif o == '-l': |
| 372 listing_style = ListingStyle.LONG |
| 373 elif o == '-L': |
| 374 listing_style = ListingStyle.LONG_LONG |
| 375 elif o == '-p': |
| 376 self.project_id = a |
| 377 elif o == '-r' or o == '-R': |
| 378 self.recursion_requested = True |
| 379 |
| 380 if not self.args: |
| 381 # default to listing all gs buckets |
| 382 self.args = ['gs://'] |
| 383 |
| 384 total_objs = 0 |
| 385 total_bytes = 0 |
| 386 |
| 387 def MaybePrintBucketHeader(blr): |
| 388 if len(self.args) > 1: |
| 389 print '%s:' % blr.url_string.encode(UTF8) |
| 390 print_bucket_header = MaybePrintBucketHeader |
| 391 |
| 392 for url_str in self.args: |
| 393 storage_url = StorageUrlFromString(url_str) |
| 394 if storage_url.IsFileUrl(): |
| 395 raise CommandException('Only cloud URLs are supported for %s' |
| 396 % self.command_name) |
| 397 bucket_fields = None |
| 398 if (listing_style == ListingStyle.SHORT or |
| 399 listing_style == ListingStyle.LONG): |
| 400 bucket_fields = ['id'] |
| 401 elif listing_style == ListingStyle.LONG_LONG: |
| 402 bucket_fields = ['location', 'storageClass', 'versioning', 'acl', |
| 403 'defaultObjectAcl', 'website', 'logging', 'cors', |
| 404 'lifecycle'] |
| 405 if storage_url.IsProvider(): |
| 406 # Provider URL: use bucket wildcard to list buckets. |
| 407 for blr in self.WildcardIterator( |
| 408 '%s://*' % storage_url.scheme).IterBuckets( |
| 409 bucket_fields=bucket_fields): |
| 410 self._PrintBucketInfo(blr, listing_style) |
| 411 elif storage_url.IsBucket() and get_bucket_info: |
| 412 # ls -b bucket listing request: List info about bucket(s). |
| 413 total_buckets = 0 |
| 414 for blr in self.WildcardIterator(url_str).IterBuckets( |
| 415 bucket_fields=bucket_fields): |
| 416 if not ContainsWildcard(url_str) and not blr.root_object: |
| 417 # Iterator does not make an HTTP call for non-wildcarded |
| 418 # listings with fields=='id'. Ensure the bucket exists by calling |
| 419 # GetBucket. |
| 420 self.gsutil_api.GetBucket( |
| 421 blr.storage_url.bucket_name, |
| 422 fields=['id'], provider=storage_url.scheme) |
| 423 self._PrintBucketInfo(blr, listing_style) |
| 424 total_buckets += 1 |
| 425 if not ContainsWildcard(url_str) and not total_buckets: |
| 426 got_bucket_nomatch_errors = True |
| 427 else: |
| 428 # URL names a bucket, object, or object subdir -> |
| 429 # list matching object(s) / subdirs. |
| 430 def _PrintPrefixLong(blr): |
| 431 print '%-33s%s' % ('', blr.url_string.encode(UTF8)) |
| 432 |
| 433 if listing_style == ListingStyle.SHORT: |
| 434 # ls helper by default readies us for a short listing. |
| 435 ls_helper = LsHelper(self.WildcardIterator, self.logger, |
| 436 all_versions=self.all_versions, |
| 437 print_bucket_header_func=print_bucket_header, |
| 438 should_recurse=self.recursion_requested) |
| 439 elif listing_style == ListingStyle.LONG: |
| 440 bucket_listing_fields = ['name', 'updated', 'size'] |
| 441 if self.all_versions: |
| 442 bucket_listing_fields.extend(['generation', 'metageneration']) |
| 443 if self.include_etag: |
| 444 bucket_listing_fields.append('etag') |
| 445 |
| 446 ls_helper = LsHelper(self.WildcardIterator, self.logger, |
| 447 print_object_func=self._PrintLongListing, |
| 448 print_dir_func=_PrintPrefixLong, |
| 449 print_bucket_header_func=print_bucket_header, |
| 450 all_versions=self.all_versions, |
| 451 should_recurse=self.recursion_requested, |
| 452 fields=bucket_listing_fields) |
| 453 |
| 454 elif listing_style == ListingStyle.LONG_LONG: |
| 455 # List all fields |
| 456 bucket_listing_fields = None |
| 457 ls_helper = LsHelper(self.WildcardIterator, self.logger, |
| 458 print_object_func=PrintFullInfoAboutObject, |
| 459 print_dir_func=_PrintPrefixLong, |
| 460 print_bucket_header_func=print_bucket_header, |
| 461 all_versions=self.all_versions, |
| 462 should_recurse=self.recursion_requested, |
| 463 fields=bucket_listing_fields) |
| 464 else: |
| 465 raise CommandException('Unknown listing style: %s' % listing_style) |
| 466 |
| 467 exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url) |
| 468 if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: |
| 469 got_nomatch_errors = True |
| 470 total_bytes += exp_bytes |
| 471 total_objs += exp_objs |
| 472 |
| 473 if total_objs and listing_style != ListingStyle.SHORT: |
| 474 print ('TOTAL: %d objects, %d bytes (%s)' % |
| 475 (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) |
| 476 if got_nomatch_errors: |
| 477 raise CommandException('One or more URLs matched no objects.') |
| 478 if got_bucket_nomatch_errors: |
| 479 raise NotFoundException('One or more bucket URLs matched no buckets.') |
| 480 |
| 481 return 0 |
OLD | NEW |