| OLD | NEW |
| 1 # -*- coding: utf-8 -*- |
| 1 # Copyright 2011 Google Inc. All Rights Reserved. | 2 # Copyright 2011 Google Inc. All Rights Reserved. |
| 2 # | 3 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
| 6 # | 7 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # | 9 # |
| 9 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, | 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and | 13 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. | 14 # limitations under the License. |
| 15 """Implementation of Unix-like ls command for cloud storage providers.""" |
| 16 |
| 17 from __future__ import absolute_import |
| 14 | 18 |
| 15 import re | 19 import re |
| 16 | 20 |
| 17 from boto.s3.deletemarker import DeleteMarker | 21 from gslib.boto_translation import S3_DELETE_MARKER_GUID |
| 18 from gslib.bucket_listing_ref import BucketListingRef | 22 from gslib.cloud_api import NotFoundException |
| 19 from gslib.command import Command | 23 from gslib.command import Command |
| 20 from gslib.command import COMMAND_NAME | 24 from gslib.cs_api_map import ApiSelector |
| 21 from gslib.command import COMMAND_NAME_ALIASES | |
| 22 from gslib.command import FILE_URIS_OK | |
| 23 from gslib.command import MAX_ARGS | |
| 24 from gslib.command import MIN_ARGS | |
| 25 from gslib.command import PROVIDER_URIS_OK | |
| 26 from gslib.command import SUPPORTED_SUB_ARGS | |
| 27 from gslib.command import URIS_START_ARG | |
| 28 from gslib.exception import CommandException | 25 from gslib.exception import CommandException |
| 29 from gslib.help_provider import HELP_NAME | 26 from gslib.ls_helper import LsHelper |
| 30 from gslib.help_provider import HELP_NAME_ALIASES | 27 from gslib.storage_url import ContainsWildcard |
| 31 from gslib.help_provider import HELP_ONE_LINE_SUMMARY | 28 from gslib.storage_url import StorageUrlFromString |
| 32 from gslib.help_provider import HELP_TEXT | 29 from gslib.translation_helper import AclTranslation |
| 33 from gslib.help_provider import HelpType | |
| 34 from gslib.help_provider import HELP_TYPE | |
| 35 from gslib.plurality_checkable_iterator import PluralityCheckableIterator | |
| 36 from gslib.util import ListingStyle | 30 from gslib.util import ListingStyle |
| 37 from gslib.util import MakeHumanReadable | 31 from gslib.util import MakeHumanReadable |
| 38 from gslib.util import PrintFullInfoAboutUri | |
| 39 from gslib.util import NO_MAX | 32 from gslib.util import NO_MAX |
| 40 from gslib.wildcard_iterator import ContainsWildcard | 33 from gslib.util import PrintFullInfoAboutObject |
| 41 import boto | 34 from gslib.util import UTF8 |
| 42 | 35 |
| 43 TIMESTAMP_RE = re.compile(r'(.*)\.[0-9]*Z') | |
| 44 | 36 |
| 45 _detailed_help_text = (""" | 37 # Regex that assists with converting JSON timestamp to ls-style output. |
| 38 # This excludes timestamp fractional seconds, for example: |
| 39 # 2013-07-03 20:32:53.048000+00:00 |
| 40 JSON_TIMESTAMP_RE = re.compile(r'([^\s]*)\s([^\.\+]*).*') |
| 41 |
| 42 _DETAILED_HELP_TEXT = (""" |
| 46 <B>SYNOPSIS</B> | 43 <B>SYNOPSIS</B> |
| 47 gsutil ls [-a] [-b] [-l] [-L] [-R] [-p proj_id] uri... | 44 gsutil ls [-a] [-b] [-l] [-L] [-R] [-p proj_id] url... |
| 48 | 45 |
| 49 | 46 |
| 50 <B>LISTING PROVIDERS, BUCKETS, SUBDIRECTORIES, AND OBJECTS</B> | 47 <B>LISTING PROVIDERS, BUCKETS, SUBDIRECTORIES, AND OBJECTS</B> |
| 51 If you run gsutil ls without URIs, it lists all of the Google Cloud Storage | 48 If you run gsutil ls without URLs, it lists all of the Google Cloud Storage |
| 52 buckets under your default project ID: | 49 buckets under your default project ID: |
| 53 | 50 |
| 54 gsutil ls | 51 gsutil ls |
| 55 | 52 |
| 56 (For details about projects, see "gsutil help projects" and also the -p | 53 (For details about projects, see "gsutil help projects" and also the -p |
| 57 option in the OPTIONS section below.) | 54 option in the OPTIONS section below.) |
| 58 | 55 |
| 59 If you specify one or more provider URIs, gsutil ls will list buckets at | 56 If you specify one or more provider URLs, gsutil ls will list buckets at |
| 60 each listed provider: | 57 each listed provider: |
| 61 | 58 |
| 62 gsutil ls gs:// | 59 gsutil ls gs:// |
| 63 | 60 |
| 64 If you specify bucket URIs, gsutil ls will list objects at the top level of | 61 If you specify bucket URLs, gsutil ls will list objects at the top level of |
| 65 each bucket, along with the names of each subdirectory. For example: | 62 each bucket, along with the names of each subdirectory. For example: |
| 66 | 63 |
| 67 gsutil ls gs://bucket | 64 gsutil ls gs://bucket |
| 68 | 65 |
| 69 might produce output like: | 66 might produce output like: |
| 70 | 67 |
| 71 gs://bucket/obj1.htm | 68 gs://bucket/obj1.htm |
| 72 gs://bucket/obj2.htm | 69 gs://bucket/obj2.htm |
| 73 gs://bucket/images1/ | 70 gs://bucket/images1/ |
| 74 gs://bucket/images2/ | 71 gs://bucket/images2/ |
| 75 | 72 |
| 76 The "/" at the end of the last 2 URIs tells you they are subdirectories, | 73 The "/" at the end of the last 2 URLs tells you they are subdirectories, |
| 77 which you can list using: | 74 which you can list using: |
| 78 | 75 |
| 79 gsutil ls gs://bucket/images* | 76 gsutil ls gs://bucket/images* |
| 80 | 77 |
| 81 If you specify object URIs, gsutil ls will list the specified objects. For | 78 If you specify object URLs, gsutil ls will list the specified objects. For |
| 82 example: | 79 example: |
| 83 | 80 |
| 84 gsutil ls gs://bucket/*.txt | 81 gsutil ls gs://bucket/*.txt |
| 85 | 82 |
| 86 will list all files whose name matches the above wildcard at the top level | 83 will list all files whose name matches the above wildcard at the top level |
| 87 of the bucket. | 84 of the bucket. |
| 88 | 85 |
| 89 See "gsutil help wildcards" for more details on working with wildcards. | 86 See "gsutil help wildcards" for more details on working with wildcards. |
| 90 | 87 |
| 91 | 88 |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 139 will print something like: | 136 will print something like: |
| 140 | 137 |
| 141 gs://bucket/obj1: | 138 gs://bucket/obj1: |
| 142 Creation Time: Fri, 02 Mar 2012 19:25:17 GMT | 139 Creation Time: Fri, 02 Mar 2012 19:25:17 GMT |
| 143 Size: 2276224 | 140 Size: 2276224 |
| 144 Cache-Control: private, max-age=0 | 141 Cache-Control: private, max-age=0 |
| 145 Content-Type: application/x-executable | 142 Content-Type: application/x-executable |
| 146 ETag: 5ca6796417570a586723b7344afffc81 | 143 ETag: 5ca6796417570a586723b7344afffc81 |
| 147 Generation: 1378862725952000 | 144 Generation: 1378862725952000 |
| 148 Metageneration: 1 | 145 Metageneration: 1 |
| 149 ACL: <Owner:00b4903a97163d99003117abe64d292561d2b4074fc90ce5c
0e35ac45f66ad70, <<UserById: 00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e
35ac45f66ad70>: u'FULL_CONTROL'>> | 146 ACL: |
| 147 [ |
| 148 { |
| 149 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac
45f66ad70", |
| 150 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f6
6ad70", |
| 151 "role": "OWNER" |
| 152 } |
| 153 ] |
| 150 TOTAL: 1 objects, 2276224 bytes (2.17 MB) | 154 TOTAL: 1 objects, 2276224 bytes (2.17 MB) |
| 151 | 155 |
| 152 Note that the -L option is slower and more costly to use than the -l option, | |
| 153 because it makes a bucket listing request followed by a HEAD request for | |
| 154 each individual object (rather than just parsing the information it needs | |
| 155 out of a single bucket listing, the way the -l option does). | |
| 156 | |
| 157 See also "gsutil help acl" for getting a more readable version of the ACL. | 156 See also "gsutil help acl" for getting a more readable version of the ACL. |
| 158 | 157 |
| 159 | 158 |
| 160 <B>LISTING BUCKET DETAILS</B> | 159 <B>LISTING BUCKET DETAILS</B> |
| 161 If you want to see information about the bucket itself, use the -b | 160 If you want to see information about the bucket itself, use the -b |
| 162 option. For example: | 161 option. For example: |
| 163 | 162 |
| 164 gsutil ls -L -b gs://bucket | 163 gsutil ls -L -b gs://bucket |
| 165 | 164 |
| 166 will print something like: | 165 will print something like: |
| 167 | 166 |
| 168 gs://bucket/ : | 167 gs://bucket/ : |
| 169 StorageClass: STANDARD | 168 StorageClass: STANDARD |
| 170 LocationConstraint: US | 169 LocationConstraint: US |
| 171 Versioning enabled: True | 170 Versioning enabled: True |
| 172 Logging: False | 171 Logging: None |
| 173 WebsiteConfiguration: False | 172 WebsiteConfiguration: None |
| 174 ACL: <Owner:00b4903a9740e42c29800f53bd5a9a62a2f96
eb3f64a4313a115df3f3a776bf7, <<GroupById: 00b4903a9740e42c29800f53bd5a9a62a2f96e
b3f64a4313a115df3f3a776bf7>: u'FULL_CONTROL'>> | 173 CORS configuration: Present |
| 175 Default ACL: <> | 174 Lifecycle configuration: None |
| 175 [ |
| 176 { |
| 177 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac
45f66ad70", |
| 178 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f6
6ad70", |
| 179 "role": "OWNER" |
| 180 } |
| 181 ] |
| 182 Default ACL: |
| 183 [ |
| 184 { |
| 185 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac
45f66ad70", |
| 186 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f6
6ad70", |
| 187 "role": "OWNER" |
| 188 } |
| 189 ] |
| 176 | 190 |
| 177 | 191 |
| 178 <B>OPTIONS</B> | 192 <B>OPTIONS</B> |
| 179 -l Prints long listing (owner, length). | 193 -l Prints long listing (owner, length). |
| 180 | 194 |
| 181 -L Prints even more detail than -l. This is a separate option because | 195 -L Prints even more detail than -l. This is a separate option because |
| 182 it makes additional service requests (so, takes longer and adds | 196 it makes additional service requests (so, takes longer and adds |
| 183 requests costs). | 197 requests costs). |
| 184 | 198 |
| 185 -b Prints info about the bucket when used with a bucket URI. | 199 -b Prints info about the bucket when used with a bucket URL. |
| 186 | 200 |
| 187 -h When used with -l, prints object sizes in human readable format | 201 -h When used with -l, prints object sizes in human readable format |
| 188 (e.g., 1KB, 234MB, 2GB, etc.) | 202 (e.g., 1KB, 234MB, 2GB, etc.) |
| 189 | 203 |
| 190 -p proj_id Specifies the project ID to use for listing buckets. | 204 -p proj_id Specifies the project ID to use for listing buckets. |
| 191 | 205 |
| 192 -R, -r Requests a recursive listing. | 206 -R, -r Requests a recursive listing. |
| 193 | 207 |
| 194 -a Includes non-current object versions / generations in the listing | 208 -a Includes non-current object versions / generations in the listing |
| 195 (only useful with a versioning-enabled bucket). If combined with | 209 (only useful with a versioning-enabled bucket). If combined with |
| 196 -l option also prints metageneration for each listed object. | 210 -l option also prints metageneration for each listed object. |
| 197 | 211 |
| 198 -e Include ETag in long listing (-l) output. | 212 -e Include ETag in long listing (-l) output. |
| 199 """) | 213 """) |
| 200 | 214 |
| 215 |
| 201 class LsCommand(Command): | 216 class LsCommand(Command): |
| 202 """Implementation of gsutil ls command.""" | 217 """Implementation of gsutil ls command.""" |
| 203 | 218 |
| 204 # Command specification (processed by parent class). | 219 # Command specification. See base class for documentation. |
| 205 command_spec = { | 220 command_spec = Command.CreateCommandSpec( |
| 206 # Name of command. | 221 'ls', |
| 207 COMMAND_NAME : 'ls', | 222 command_name_aliases=['dir', 'list'], |
| 208 # List of command name aliases. | 223 min_args=0, |
| 209 COMMAND_NAME_ALIASES : ['dir', 'list'], | 224 max_args=NO_MAX, |
| 210 # Min number of args required by this command. | 225 supported_sub_args='aeblLhp:rR', |
| 211 MIN_ARGS : 0, | 226 file_url_ok=False, |
| 212 # Max number of args required by this command, or NO_MAX. | 227 provider_url_ok=True, |
| 213 MAX_ARGS : NO_MAX, | 228 urls_start_arg=0, |
| 214 # Getopt-style string specifying acceptable sub args. | 229 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| 215 SUPPORTED_SUB_ARGS : 'aeblLhp:rR', | 230 gs_default_api=ApiSelector.JSON, |
| 216 # True if file URIs acceptable for this command. | 231 ) |
| 217 FILE_URIS_OK : False, | 232 # Help specification. See help_provider.py for documentation. |
| 218 # True if provider-only URIs acceptable for this command. | 233 help_spec = Command.HelpSpec( |
| 219 PROVIDER_URIS_OK : True, | 234 help_name='ls', |
| 220 # Index in args of first URI arg. | 235 help_name_aliases=['dir', 'list'], |
| 221 URIS_START_ARG : 0, | 236 help_type='command_help', |
| 222 } | 237 help_one_line_summary='List providers, buckets, or objects', |
| 223 help_spec = { | 238 help_text=_DETAILED_HELP_TEXT, |
| 224 # Name of command or auxiliary help info for which this help applies. | 239 subcommand_help_text={}, |
| 225 HELP_NAME : 'ls', | 240 ) |
| 226 # List of help name aliases. | |
| 227 HELP_NAME_ALIASES : ['dir', 'list'], | |
| 228 # Type of help: | |
| 229 HELP_TYPE : HelpType.COMMAND_HELP, | |
| 230 # One line summary of this help. | |
| 231 HELP_ONE_LINE_SUMMARY : 'List providers, buckets, or objects', | |
| 232 # The full help text. | |
| 233 HELP_TEXT : _detailed_help_text, | |
| 234 } | |
| 235 | 241 |
| 236 def _PrintBucketInfo(self, bucket_uri, listing_style): | 242 def _PrintBucketInfo(self, bucket_blr, listing_style): |
| 237 """Print listing info for given bucket. | 243 """Print listing info for given bucket. |
| 238 | 244 |
| 239 Args: | 245 Args: |
| 240 bucket_uri: StorageUri being listed. | 246 bucket_blr: BucketListingReference for the bucket being listed |
| 241 listing_style: ListingStyle enum describing type of output desired. | 247 listing_style: ListingStyle enum describing type of output desired. |
| 248 |
| 249 Returns: |
| 250 Tuple (total objects, total bytes) in the bucket. |
| 242 """ | 251 """ |
| 243 if (listing_style == ListingStyle.SHORT or | 252 if (listing_style == ListingStyle.SHORT or |
| 244 listing_style == ListingStyle.LONG): | 253 listing_style == ListingStyle.LONG): |
| 245 print bucket_uri | 254 print bucket_blr |
| 246 return | 255 return |
| 256 # listing_style == ListingStyle.LONG_LONG: |
| 257 # We're guaranteed by the caller that the root object is populated. |
| 258 bucket = bucket_blr.root_object |
| 259 location_constraint = bucket.location |
| 260 storage_class = bucket.storageClass |
| 261 fields = {'bucket': bucket_blr.url_string, |
| 262 'storage_class': storage_class, |
| 263 'location_constraint': location_constraint, |
| 264 'acl': AclTranslation.JsonFromMessage(bucket.acl), |
| 265 'default_acl': AclTranslation.JsonFromMessage( |
| 266 bucket.defaultObjectAcl)} |
| 247 | 267 |
| 248 location_constraint = bucket_uri.get_location(validate=False, | 268 fields['versioning'] = bucket.versioning and bucket.versioning.enabled |
| 249 headers=self.headers) | 269 fields['website_config'] = 'Present' if bucket.website else 'None' |
| 250 storage_class = bucket_uri.get_storage_class(validate=False, | 270 fields['logging_config'] = 'Present' if bucket.logging else 'None' |
| 251 headers=self.headers) | 271 fields['cors_config'] = 'Present' if bucket.cors else 'None' |
| 252 self.proj_id_handler.FillInProjectHeaderIfNeeded( | 272 fields['lifecycle_config'] = 'Present' if bucket.lifecycle else 'None' |
| 253 'get_acl', bucket_uri, self.headers) | 273 |
| 254 fields = { | 274 # For field values that are multiline, add indenting to make it look |
| 255 'bucket': bucket_uri, | 275 # prettier. |
| 256 'storage_class': storage_class, | 276 for key in fields: |
| 257 'location_constraint': location_constraint or 'None', | 277 previous_value = fields[key] |
| 258 'versioning': bucket_uri.get_versioning_config(self.headers), | 278 if (not isinstance(previous_value, basestring) or |
| 259 'acl': bucket_uri.get_acl(False, self.headers), | 279 '\n' not in previous_value): |
| 260 'default_acl': bucket_uri.get_def_acl(False, self.headers), | 280 continue |
| 261 } | 281 new_value = previous_value.replace('\n', '\n\t ') |
| 262 # For other configuration fields, just show them as "Present/None". | 282 # Start multiline values on a new line if they aren't already. |
| 263 # website_config is a dictionary of {"WebsiteConfiguration": config} | 283 if not new_value.startswith('\n'): |
| 264 website_config = bucket_uri.get_website_config(self.headers) | 284 new_value = '\n\t ' + new_value |
| 265 fields["website_config"] = ( | 285 fields[key] = new_value |
| 266 "Present" if website_config["WebsiteConfiguration"] else "None") | |
| 267 # logging_config is a dictionary of {"Logging": config} | |
| 268 logging_config = bucket_uri.get_logging_config(self.headers) | |
| 269 fields["logging_config"] = ( | |
| 270 "Present" if logging_config["Logging"] else "None") | |
| 271 # cors_config wraps a list of cors | |
| 272 cors_config = bucket_uri.get_cors(self.headers) | |
| 273 fields["cors_config"] = "Present" if cors_config.cors else "None" | |
| 274 # lifecycle_config is a list itself | |
| 275 lifecycle_config = bucket_uri.get_lifecycle_config(self.headers) | |
| 276 fields["lifecycle_config"] = ( | |
| 277 "Present" if lifecycle_config else "None") | |
| 278 | 286 |
| 279 print('{bucket} :\n' | 287 print('{bucket} :\n' |
| 280 '\tStorage class:\t\t\t{storage_class}\n' | 288 '\tStorage class:\t\t\t{storage_class}\n' |
| 281 '\tLocation constraint:\t\t{location_constraint}\n' | 289 '\tLocation constraint:\t\t{location_constraint}\n' |
| 282 '\tVersioning enabled:\t\t{versioning}\n' | 290 '\tVersioning enabled:\t\t{versioning}\n' |
| 283 '\tLogging configuration:\t\t{logging_config}\n' | 291 '\tLogging configuration:\t\t{logging_config}\n' |
| 284 '\tWebsite configuration:\t\t{website_config}\n' | 292 '\tWebsite configuration:\t\t{website_config}\n' |
| 285 '\tCORS configuration: \t\t{cors_config}\n' | 293 '\tCORS configuration: \t\t{cors_config}\n' |
| 286 '\tLifecycle configuration:\t{lifecycle_config}\n' | 294 '\tLifecycle configuration:\t{lifecycle_config}\n' |
| 287 '\tACL:\t\t\t\t{acl}\n' | 295 '\tACL:\t\t\t\t{acl}\n' |
| 288 '\tDefault ACL:\t\t\t{default_acl}'.format(**fields)) | 296 '\tDefault ACL:\t\t\t{default_acl}'.format(**fields)) |
| 297 if bucket_blr.storage_url.scheme == 's3': |
| 298 print('Note: this is an S3 bucket so configuration values may be ' |
| 299 'blank. To retrieve bucket configuration values, use ' |
| 300 'individual configuration commands such as gsutil acl get ' |
| 301 '<bucket>.') |
| 289 | 302 |
| 290 def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref, listing_style): | 303 def _PrintLongListing(self, bucket_listing_ref): |
| 291 """Print listing info for given bucket_listing_ref. | 304 """Prints an object with ListingStyle.LONG.""" |
| 305 obj = bucket_listing_ref.root_object |
| 306 url_str = bucket_listing_ref.url_string |
| 307 if (obj.metadata and S3_DELETE_MARKER_GUID in |
| 308 obj.metadata.additionalProperties): |
| 309 size_string = '0' |
| 310 num_bytes = 0 |
| 311 num_objs = 0 |
| 312 url_str += '<DeleteMarker>' |
| 313 else: |
| 314 size_string = (MakeHumanReadable(obj.size) |
| 315 if self.human_readable else str(obj.size)) |
| 316 num_bytes = obj.size |
| 317 num_objs = 1 |
| 292 | 318 |
| 293 Args: | 319 timestamp = JSON_TIMESTAMP_RE.sub( |
| 294 bucket_listing_ref: BucketListing being listed. | 320 r'\1T\2Z', str(obj.updated).decode(UTF8).encode('ascii')) |
| 295 listing_style: ListingStyle enum describing type of output desired. | 321 printstr = '%(size)10s %(timestamp)s %(url)s' |
| 296 | 322 encoded_etag = None |
| 297 Returns: | 323 encoded_metagen = None |
| 298 Tuple (number of objects, | 324 if self.all_versions: |
| 299 object length, if listing_style is one of the long listing formats) | 325 printstr += ' metageneration=%(metageneration)s' |
| 300 | 326 encoded_metagen = str(obj.metageneration).encode(UTF8) |
| 301 Raises: | 327 if self.include_etag: |
| 302 Exception: if calling bug encountered. | 328 printstr += ' etag=%(etag)s' |
| 303 """ | 329 encoded_etag = obj.etag.encode(UTF8) |
| 304 uri = bucket_listing_ref.GetUri() | 330 format_args = { |
| 305 obj = bucket_listing_ref.GetKey() | 331 'size': size_string, |
| 306 uri_str = UriStrForObj(uri, obj, self.all_versions) | 332 'timestamp': timestamp, |
| 307 if listing_style == ListingStyle.SHORT: | 333 'url': url_str.encode(UTF8), |
| 308 print uri_str.encode('utf-8') | 334 'metageneration': encoded_metagen, |
| 309 return (1, 0) | 335 'etag': encoded_etag |
| 310 elif listing_style == ListingStyle.LONG: | 336 } |
| 311 # Exclude timestamp fractional secs (example: 2010-08-23T12:46:54.187Z). | 337 print printstr % format_args |
| 312 timestamp = TIMESTAMP_RE.sub( | |
| 313 r'\1Z', obj.last_modified.decode('utf8').encode('ascii')) | |
| 314 | |
| 315 if isinstance(obj, DeleteMarker): | |
| 316 size_string = '0' | |
| 317 numbytes = 0 | |
| 318 numobjs = 0 | |
| 319 else: | |
| 320 size_string = (MakeHumanReadable(obj.size) | |
| 321 if self.human_readable else str(obj.size)) | |
| 322 numbytes = obj.size | |
| 323 numobjs = 1 | |
| 324 | |
| 325 printstr = '%(size)10s %(timestamp)s %(uri)s' | |
| 326 if self.all_versions and hasattr(obj, 'metageneration'): | |
| 327 printstr += ' metageneration=%(metageneration)s' | |
| 328 if self.include_etag: | |
| 329 printstr += ' etag=%(etag)s' | |
| 330 format_args = { | |
| 331 'size': size_string, | |
| 332 'timestamp': timestamp, | |
| 333 'uri': uri_str.encode('utf-8'), | |
| 334 'metageneration': str(getattr(obj, 'metageneration', '')), | |
| 335 'etag': obj.etag.encode('utf-8'), | |
| 336 } | |
| 337 print printstr % format_args | |
| 338 return (numobjs, numbytes) | |
| 339 elif listing_style == ListingStyle.LONG_LONG: | |
| 340 return PrintFullInfoAboutUri(uri, True, self.headers) | |
| 341 else: | |
| 342 raise Exception('Unexpected ListingStyle(%s)' % listing_style) | |
| 343 | |
| 344 def _ExpandUriAndPrintInfo(self, uri, listing_style, should_recurse=False): | |
| 345 """ | |
| 346 Expands wildcards and directories/buckets for uri as needed, and | |
| 347 calls _PrintInfoAboutBucketListingRef() on each. | |
| 348 | |
| 349 Args: | |
| 350 uri: StorageUri being listed. | |
| 351 listing_style: ListingStyle enum describing type of output desired. | |
| 352 should_recurse: bool indicator of whether to expand recursively. | |
| 353 | |
| 354 Returns: | |
| 355 Tuple (number of matching objects, number of bytes across these objects). | |
| 356 """ | |
| 357 # We do a two-level loop, with the outer loop iterating level-by-level from | |
| 358 # blrs_to_expand, and the inner loop iterating the matches at the current | |
| 359 # level, printing them, and adding any new subdirs that need expanding to | |
| 360 # blrs_to_expand (to be picked up in the next outer loop iteration). | |
| 361 blrs_to_expand = [BucketListingRef(uri)] | |
| 362 num_objs = 0 | |
| 363 num_bytes = 0 | |
| 364 expanding_top_level = True | |
| 365 printed_one = False | |
| 366 num_expanded_blrs = 0 | |
| 367 while len(blrs_to_expand): | |
| 368 if printed_one: | |
| 369 print | |
| 370 blr = blrs_to_expand.pop(0) | |
| 371 if blr.HasKey(): | |
| 372 blr_iterator = iter([blr]) | |
| 373 elif blr.HasPrefix(): | |
| 374 # Bucket subdir from a previous iteration. Print "header" line only if | |
| 375 # we're listing more than one subdir (or if it's a recursive listing), | |
| 376 # to be consistent with the way UNIX ls works. | |
| 377 if num_expanded_blrs > 1 or should_recurse: | |
| 378 print '%s:' % blr.GetUriString().encode('utf-8') | |
| 379 printed_one = True | |
| 380 blr_iterator = self.WildcardIterator('%s/*' % | |
| 381 blr.GetRStrippedUriString(), | |
| 382 all_versions=self.all_versions) | |
| 383 elif blr.NamesBucket(): | |
| 384 blr_iterator = self.WildcardIterator('%s*' % blr.GetUriString(), | |
| 385 all_versions=self.all_versions) | |
| 386 else: | |
| 387 # This BLR didn't come from a bucket listing. This case happens for | |
| 388 # BLR's instantiated from a user-provided URI. | |
| 389 blr_iterator = PluralityCheckableIterator( | |
| 390 UriOnlyBlrExpansionIterator( | |
| 391 self, blr, all_versions=self.all_versions)) | |
| 392 if blr_iterator.is_empty() and not ContainsWildcard(uri): | |
| 393 raise CommandException('No such object %s' % uri) | |
| 394 for cur_blr in blr_iterator: | |
| 395 num_expanded_blrs = num_expanded_blrs + 1 | |
| 396 if cur_blr.HasKey(): | |
| 397 # Object listing. | |
| 398 (no, nb) = self._PrintInfoAboutBucketListingRef( | |
| 399 cur_blr, listing_style) | |
| 400 num_objs += no | |
| 401 num_bytes += nb | |
| 402 printed_one = True | |
| 403 else: | |
| 404 # Subdir listing. If we're at the top level of a bucket subdir | |
| 405 # listing don't print the list here (corresponding to how UNIX ls | |
| 406 # dir just prints its contents, not the name followed by its | |
| 407 # contents). | |
| 408 if (expanding_top_level and not uri.names_bucket()) or should_recurse: | |
| 409 if cur_blr.GetUriString().endswith('//'): | |
| 410 # Expand gs://bucket// into gs://bucket//* so we don't infinite | |
| 411 # loop. This case happens when user has uploaded an object whose | |
| 412 # name begins with a /. | |
| 413 cur_blr = BucketListingRef(self.suri_builder.StorageUri( | |
| 414 '%s*' % cur_blr.GetUriString()), None, None, cur_blr.headers) | |
| 415 blrs_to_expand.append(cur_blr) | |
| 416 # Don't include the subdir name in the output if we're doing a | |
| 417 # recursive listing, as it will be printed as 'subdir:' when we get | |
| 418 # to the prefix expansion, the next iteration of the main loop. | |
| 419 else: | |
| 420 if listing_style == ListingStyle.LONG: | |
| 421 print '%-33s%s' % ( | |
| 422 '', cur_blr.GetUriString().encode('utf-8')) | |
| 423 else: | |
| 424 print cur_blr.GetUriString().encode('utf-8') | |
| 425 expanding_top_level = False | |
| 426 return (num_objs, num_bytes) | 338 return (num_objs, num_bytes) |
| 427 | 339 |
| 428 # Command entry point. | |
| 429 def RunCommand(self): | 340 def RunCommand(self): |
| 341 """Command entry point for the ls command.""" |
| 430 got_nomatch_errors = False | 342 got_nomatch_errors = False |
| 343 got_bucket_nomatch_errors = False |
| 431 listing_style = ListingStyle.SHORT | 344 listing_style = ListingStyle.SHORT |
| 432 get_bucket_info = False | 345 get_bucket_info = False |
| 433 self.recursion_requested = False | 346 self.recursion_requested = False |
| 434 self.all_versions = False | 347 self.all_versions = False |
| 435 self.include_etag = False | 348 self.include_etag = False |
| 436 self.human_readable = False | 349 self.human_readable = False |
| 437 if self.sub_opts: | 350 if self.sub_opts: |
| 438 for o, a in self.sub_opts: | 351 for o, a in self.sub_opts: |
| 439 if o == '-a': | 352 if o == '-a': |
| 440 self.all_versions = True | 353 self.all_versions = True |
| 441 elif o == '-e': | 354 elif o == '-e': |
| 442 self.include_etag = True | 355 self.include_etag = True |
| 443 elif o == '-b': | 356 elif o == '-b': |
| 444 get_bucket_info = True | 357 get_bucket_info = True |
| 445 elif o == '-h': | 358 elif o == '-h': |
| 446 self.human_readable = True | 359 self.human_readable = True |
| 447 elif o == '-l': | 360 elif o == '-l': |
| 448 listing_style = ListingStyle.LONG | 361 listing_style = ListingStyle.LONG |
| 449 elif o == '-L': | 362 elif o == '-L': |
| 450 listing_style = ListingStyle.LONG_LONG | 363 listing_style = ListingStyle.LONG_LONG |
| 451 elif o == '-p': | 364 elif o == '-p': |
| 452 self.proj_id_handler.SetProjectId(a) | 365 self.project_id = a |
| 453 elif o == '-r' or o == '-R': | 366 elif o == '-r' or o == '-R': |
| 454 self.recursion_requested = True | 367 self.recursion_requested = True |
| 455 | 368 |
| 456 if not self.args: | 369 if not self.args: |
| 457 # default to listing all gs buckets | 370 # default to listing all gs buckets |
| 458 self.args = ['gs://'] | 371 self.args = ['gs://'] |
| 459 | 372 |
| 460 total_objs = 0 | 373 total_objs = 0 |
| 461 total_bytes = 0 | 374 total_bytes = 0 |
| 462 for uri_str in self.args: | |
| 463 uri = self.suri_builder.StorageUri(uri_str) | |
| 464 self.proj_id_handler.FillInProjectHeaderIfNeeded('ls', uri, self.headers) | |
| 465 | 375 |
| 466 if uri.names_provider(): | 376 def MaybePrintBucketHeader(blr): |
| 467 # Provider URI: use bucket wildcard to list buckets. | 377 if len(self.args) > 1: |
| 468 for uri in self.WildcardIterator('%s://*' % uri.scheme).IterUris(): | 378 print '%s:' % blr.url_string.encode(UTF8) |
| 469 self._PrintBucketInfo(uri, listing_style) | 379 print_bucket_header = MaybePrintBucketHeader |
| 470 elif uri.names_bucket(): | |
| 471 # Bucket URI -> list the object(s) in that bucket. | |
| 472 if get_bucket_info: | |
| 473 # ls -b bucket listing request: List info about bucket(s). | |
| 474 | 380 |
| 475 if (listing_style != ListingStyle.LONG_LONG and | 381 for url_str in self.args: |
| 476 not ContainsWildcard(uri)): | 382 storage_url = StorageUrlFromString(url_str) |
| 477 # At this point, we haven't done any validation that the bucket URI | 383 if storage_url.IsFileUrl(): |
| 478 # actually exists. If the listing style is short, the | 384 raise CommandException('Only cloud URLs are supported for %s' |
| 479 # _PrintBucketInfo doesn't do any RPCs, so check to make sure the | 385 % self.command_name) |
| 480 # bucket actually exists by fetching it. | 386 bucket_fields = None |
| 481 uri.get_bucket(validate=True) | 387 if (listing_style == ListingStyle.SHORT or |
| 388 listing_style == ListingStyle.LONG): |
| 389 bucket_fields = ['id'] |
| 390 elif listing_style == ListingStyle.LONG_LONG: |
| 391 bucket_fields = ['location', 'storageClass', 'versioning', 'acl', |
| 392 'defaultObjectAcl', 'website', 'logging', 'cors', |
| 393 'lifecycle'] |
| 394 if storage_url.IsProvider(): |
| 395 # Provider URL: use bucket wildcard to list buckets. |
| 396 for blr in self.WildcardIterator( |
| 397 '%s://*' % storage_url.scheme).IterBuckets( |
| 398 bucket_fields=bucket_fields): |
| 399 self._PrintBucketInfo(blr, listing_style) |
| 400 elif storage_url.IsBucket() and get_bucket_info: |
| 401 # ls -b bucket listing request: List info about bucket(s). |
| 402 total_buckets = 0 |
| 403 for blr in self.WildcardIterator(url_str).IterBuckets( |
| 404 bucket_fields=bucket_fields): |
| 405 if not ContainsWildcard(url_str) and not blr.root_object: |
| 406 # Iterator does not make an HTTP call for non-wildcarded |
| 407 # listings with fields=='id'. Ensure the bucket exists by calling |
| 408 # GetBucket. |
| 409 self.gsutil_api.GetBucket( |
| 410 blr.storage_url.bucket_name, |
| 411 fields=['id'], provider=storage_url.scheme) |
| 412 self._PrintBucketInfo(blr, listing_style) |
| 413 total_buckets += 1 |
| 414 if not ContainsWildcard(url_str) and not total_buckets: |
| 415 got_bucket_nomatch_errors = True |
| 416 else: |
| 417 # URL names a bucket, object, or object subdir -> |
| 418 # list matching object(s) / subdirs. |
| 419 def _PrintPrefixLong(blr): |
| 420 print '%-33s%s' % ('', blr.url_string.encode(UTF8)) |
| 482 | 421 |
| 483 for uri in self.WildcardIterator(uri).IterUris(): | 422 if listing_style == ListingStyle.SHORT: |
| 484 self._PrintBucketInfo(uri, listing_style) | 423 # ls helper by default readies us for a short listing. |
| 424 ls_helper = LsHelper(self.WildcardIterator, self.logger, |
| 425 all_versions=self.all_versions, |
| 426 print_bucket_header_func=print_bucket_header, |
| 427 should_recurse=self.recursion_requested) |
| 428 elif listing_style == ListingStyle.LONG: |
| 429 bucket_listing_fields = ['name', 'updated', 'size'] |
| 430 if self.all_versions: |
| 431 bucket_listing_fields.extend(['generation', 'metageneration']) |
| 432 if self.include_etag: |
| 433 bucket_listing_fields.append('etag') |
| 434 |
| 435 ls_helper = LsHelper(self.WildcardIterator, self.logger, |
| 436 print_object_func=self._PrintLongListing, |
| 437 print_dir_func=_PrintPrefixLong, |
| 438 print_bucket_header_func=print_bucket_header, |
| 439 all_versions=self.all_versions, |
| 440 should_recurse=self.recursion_requested, |
| 441 fields=bucket_listing_fields) |
| 442 |
| 443 elif listing_style == ListingStyle.LONG_LONG: |
| 444 # List all fields |
| 445 bucket_listing_fields = None |
| 446 ls_helper = LsHelper(self.WildcardIterator, self.logger, |
| 447 print_object_func=PrintFullInfoAboutObject, |
| 448 print_dir_func=_PrintPrefixLong, |
| 449 print_bucket_header_func=print_bucket_header, |
| 450 all_versions=self.all_versions, |
| 451 should_recurse=self.recursion_requested, |
| 452 fields=bucket_listing_fields) |
| 485 else: | 453 else: |
| 486 # Not -b request: List objects in the bucket(s). | 454 raise CommandException('Unknown listing style: %s' % listing_style) |
| 487 (no, nb) = self._ExpandUriAndPrintInfo(uri, listing_style, | 455 |
| 488 should_recurse=self.recursion_requested) | 456 exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url) |
| 489 if no == 0 and ContainsWildcard(uri): | 457 if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: |
| 490 got_nomatch_errors = True | |
| 491 total_objs += no | |
| 492 total_bytes += nb | |
| 493 else: | |
| 494 # URI names an object or object subdir -> list matching object(s) / | |
| 495 # subdirs. | |
| 496 (exp_objs, exp_bytes) = self._ExpandUriAndPrintInfo(uri, listing_style, | |
| 497 should_recurse=self.recursion_requested) | |
| 498 if exp_objs == 0 and ContainsWildcard(uri): | |
| 499 got_nomatch_errors = True | 458 got_nomatch_errors = True |
| 500 total_bytes += exp_bytes | 459 total_bytes += exp_bytes |
| 501 total_objs += exp_objs | 460 total_objs += exp_objs |
| 502 | 461 |
| 503 if total_objs and listing_style != ListingStyle.SHORT: | 462 if total_objs and listing_style != ListingStyle.SHORT: |
| 504 print ('TOTAL: %d objects, %d bytes (%s)' % | 463 print ('TOTAL: %d objects, %d bytes (%s)' % |
| 505 (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) | 464 (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) |
| 506 if got_nomatch_errors: | 465 if got_nomatch_errors: |
| 507 raise CommandException('One or more URIs matched no objects.') | 466 raise CommandException('One or more URLs matched no objects.') |
| 467 if got_bucket_nomatch_errors: |
| 468 raise NotFoundException('One or more bucket URLs matched no buckets.') |
| 508 | 469 |
| 509 return 0 | 470 return 0 |
| 510 | |
| 511 | |
| 512 class UriOnlyBlrExpansionIterator: | |
| 513 """ | |
| 514 Iterator that expands a BucketListingRef that contains only a URI (i.e., | |
| 515 didn't come from a bucket listing), yielding BucketListingRefs to which it | |
| 516 expands. This case happens for BLR's instantiated from a user-provided URI. | |
| 517 | |
| 518 Note that we can't use NameExpansionIterator here because it produces an | |
| 519 iteration over the full object names (e.g., expanding "gs://bucket" to | |
| 520 "gs://bucket/dir/o1" and "gs://bucket/dir/o2"), while for the ls command | |
| 521 we need also to see the intermediate directories (like "gs://bucket/dir"). | |
| 522 """ | |
| 523 def __init__(self, command_instance, blr, all_versions=False): | |
| 524 self.command_instance = command_instance | |
| 525 self.blr = blr | |
| 526 self.all_versions=all_versions | |
| 527 | |
| 528 def __iter__(self): | |
| 529 """ | |
| 530 Args: | |
| 531 command_instance: calling instance of Command class. | |
| 532 blr: BucketListingRef to expand. | |
| 533 | |
| 534 Yields: | |
| 535 List of BucketListingRef to which it expands. | |
| 536 """ | |
| 537 # Do a delimited wildcard expansion so we get any matches along with | |
| 538 # whether they are keys or prefixes. That way if bucket contains a key | |
| 539 # 'abcd' and another key 'abce/x.txt' the expansion will return two BLRs, | |
| 540 # the first with HasKey()=True and the second with HasPrefix()=True. | |
| 541 rstripped_versionless_uri_str = self.blr.GetRStrippedUriString() | |
| 542 if ContainsWildcard(rstripped_versionless_uri_str): | |
| 543 for blr in self.command_instance.WildcardIterator( | |
| 544 rstripped_versionless_uri_str, all_versions=self.all_versions): | |
| 545 yield blr | |
| 546 return | |
| 547 # Build a wildcard to expand so CloudWildcardIterator will not just treat it | |
| 548 # as a key and yield the result without doing a bucket listing. | |
| 549 for blr in self.command_instance.WildcardIterator( | |
| 550 rstripped_versionless_uri_str + '*', all_versions=self.all_versions): | |
| 551 # Find the originally specified BucketListingRef in the expanded list (if | |
| 552 # present). Don't just use the expanded list, because it would also | |
| 553 # include objects whose name prefix matches the blr name (because of the | |
| 554 # wildcard match we did above). Note that there can be multiple matches, | |
| 555 # for the case where there's both an object and a subdirectory with the | |
| 556 # same name. | |
| 557 if (blr.GetRStrippedUriString() | |
| 558 == rstripped_versionless_uri_str): | |
| 559 yield blr | |
| 560 | |
| 561 | |
| 562 def UriStrForObj(uri, obj, all_versions): | |
| 563 """Constructs a URI string for the given object. | |
| 564 | |
| 565 For example if we were iterating gs://*, obj could be an object in one | |
| 566 of the user's buckets enumerated by the ls command. | |
| 567 | |
| 568 Args: | |
| 569 uri: base StorageUri being iterated. | |
| 570 obj: object (Key) being listed. | |
| 571 all_versions: Whether or not to include versioning. | |
| 572 | |
| 573 Returns: | |
| 574 URI string. | |
| 575 """ | |
| 576 version_info = '' | |
| 577 if all_versions: | |
| 578 if uri.get_provider().name == 'google' and obj.generation: | |
| 579 version_info = '#%s' % obj.generation | |
| 580 elif uri.get_provider().name == 'aws' and obj.version_id: | |
| 581 if isinstance(obj, DeleteMarker): | |
| 582 version_info = '#<DeleteMarker>' + str(obj.version_id) | |
| 583 else: | |
| 584 version_info = '#' + str(obj.version_id) | |
| 585 else: | |
| 586 version_info = '' | |
| 587 return '%s://%s/%s%s' % (uri.scheme, obj.bucket.name, obj.name, | |
| 588 version_info) | |
| OLD | NEW |