| Index: gslib/bucket_listing_ref.py
|
| ===================================================================
|
| --- gslib/bucket_listing_ref.py (revision 33376)
|
| +++ gslib/bucket_listing_ref.py (working copy)
|
| @@ -1,175 +1,114 @@
|
| +# -*- coding: utf-8 -*-
|
| # Copyright 2012 Google Inc. All Rights Reserved.
|
| #
|
| -# Permission is hereby granted, free of charge, to any person obtaining a
|
| -# copy of this software and associated documentation files (the
|
| -# "Software"), to deal in the Software without restriction, including
|
| -# without limitation the rights to use, copy, modify, merge, publish, dis-
|
| -# tribute, sublicense, and/or sell copies of the Software, and to permit
|
| -# persons to whom the Software is furnished to do so, subject to the fol-
|
| -# lowing conditions:
|
| +# Licensed under the Apache License, Version 2.0 (the "License");
|
| +# you may not use this file except in compliance with the License.
|
| +# You may obtain a copy of the License at
|
| #
|
| -# The above copyright notice and this permission notice shall be included
|
| -# in all copies or substantial portions of the Software.
|
| +# http://www.apache.org/licenses/LICENSE-2.0
|
| #
|
| -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
| -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
|
| -# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
| -# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
| -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
| -# IN THE SOFTWARE.
|
| +# Unless required by applicable law or agreed to in writing, software
|
| +# distributed under the License is distributed on an "AS IS" BASIS,
|
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| +# See the License for the specific language governing permissions and
|
| +# limitations under the License.
|
| +"""Classes for cloud/file references yielded by gsutil iterators."""
|
|
|
| -import time
|
| +from __future__ import absolute_import
|
|
|
|
|
| class BucketListingRef(object):
|
| - """
|
| - Container that holds a reference to one result from a bucket listing, allowing
|
| - polymorphic iteration over wildcard-iterated URIs, Keys, or Prefixes. At a
|
| - minimum, every reference contains a StorageUri. If the reference came from a
|
| - bucket listing (as opposed to a manually instantiated ref that might populate
|
| - only the StorageUri), it will additionally contain either a Key or a Prefix,
|
| - depending on whether it was a reference to an object or was just a prefix of a
|
| - path (i.e., bucket subdirectory). The latter happens when the bucket was
|
| - listed using delimiter='/'.
|
| + """Base class for a reference to one fully expanded iterator result.
|
|
|
| - Note that Keys are shallow-populated, based on the contents extracted from
|
| - parsing a bucket listing. This includes name, length, and other fields
|
| - (basically, the info listed by gsutil ls -l), but does not include information
|
| - like ACL and location (which require separate server requests, which is why
|
| - there's a separate gsutil ls -L option to get this more detailed info).
|
| - """
|
| + This allows polymorphic iteration over wildcard-iterated URLs. The
|
| + reference contains a fully expanded URL string containing no wildcards and
|
| + referring to exactly one entity (if a wildcard is contained, it is assumed
|
| + this is part of the raw string and should never be treated as a wildcard).
|
|
|
| - def __init__(self, uri, key=None, prefix=None, headers=None):
|
| - """Instantiate BucketListingRef from uri and (if available) key or prefix.
|
| + Each reference represents a Bucket, Object, or Prefix. For filesystem URLs,
|
| + Objects represent files and Prefixes represent directories.
|
|
|
| - Args:
|
| - uri: StorageUri for the object (required).
|
| - key: Key for the object, or None if not available.
|
| - prefix: Prefix for the subdir, or None if not available.
|
| - headers: Dictionary containing optional HTTP headers to pass to boto
|
| - (which happens when GetKey() is called on an BucketListingRef which
|
| - has no constructor-populated Key), or None if not available.
|
| + The root_object member contains the underlying object as it was retrieved.
|
| + It is populated by the calling iterator, which may only request certain
|
| + fields to reduce the number of server requests.
|
|
|
| - At most one of key and prefix can be populated.
|
| - """
|
| - assert key is None or prefix is None
|
| - self.uri = uri
|
| - self.key = key
|
| - self.prefix = prefix
|
| - self.headers = headers or {}
|
| + For filesystem URLs, root_object is not populated.
|
| + """
|
|
|
| - def GetUri(self):
|
| - """Get URI form of listed URI.
|
| + class _BucketListingRefType(object):
|
| + """Enum class for describing BucketListingRefs."""
|
| + BUCKET = 'bucket' # Cloud bucket
|
| + OBJECT = 'object' # Cloud object or filesystem file
|
| + PREFIX = 'prefix' # Cloud bucket subdir or filesystem directory
|
|
|
| - Returns:
|
| - StorageUri.
|
| - """
|
| - return self.uri
|
| + @property
|
| + def url_string(self):
|
| + return self._url_string
|
|
|
| - def GetUriString(self):
|
| - """Get string URI form of listed URI.
|
| + @property
|
| + def type_name(self):
|
| + return self._ref_type
|
|
|
| - Returns:
|
| - String.
|
| - """
|
| - return self.uri.uri
|
| + def IsBucket(self):
|
| + return self._ref_type == self._BucketListingRefType.BUCKET
|
|
|
| - def NamesBucket(self):
|
| - """Determines if this BucketListingRef names a bucket.
|
| + def IsObject(self):
|
| + return self._ref_type == self._BucketListingRefType.OBJECT
|
|
|
| - Returns:
|
| - bool indicator.
|
| - """
|
| - return self.key is None and self.prefix is None and self.uri.names_bucket()
|
| + def IsPrefix(self):
|
| + return self._ref_type == self._BucketListingRefType.PREFIX
|
|
|
| - def IsLatest(self):
|
| - """Determines if this BucketListingRef names the latest version of an
|
| - object.
|
| + def __str__(self):
|
| + return self._url_string
|
|
|
| - Returns:
|
| - bool indicator.
|
| - """
|
| - return hasattr(self.uri, 'is_latest') and self.uri.is_latest
|
|
|
| - def GetRStrippedUriString(self):
|
| - """Get string URI form of listed URI, stripped of any right trailing
|
| - delims, and without version string.
|
| +class BucketListingBucket(BucketListingRef):
|
| + """BucketListingRef subclass for buckets."""
|
|
|
| - Returns:
|
| - String.
|
| + def __init__(self, storage_url, root_object=None):
|
| + """Creates a BucketListingRef of type bucket.
|
| +
|
| + Args:
|
| + storage_url: StorageUrl containing a bucket.
|
| + root_object: Underlying object metadata, if available.
|
| """
|
| - return self.uri.versionless_uri.rstrip('/')
|
| + super(BucketListingBucket, self).__init__()
|
| + self._ref_type = self._BucketListingRefType.BUCKET
|
| + self._url_string = storage_url.url_string
|
| + self.storage_url = storage_url
|
| + self.root_object = root_object
|
|
|
| - def HasKey(self):
|
| - """Return bool indicator of whether this BucketListingRef has a Key."""
|
| - return bool(self.key)
|
|
|
| - def HasPrefix(self):
|
| - """Return bool indicator of whether this BucketListingRef has a Prefix."""
|
| - return bool(self.prefix)
|
| +class BucketListingPrefix(BucketListingRef):
|
| + """BucketListingRef subclass for prefixes."""
|
|
|
| - def GetKey(self):
|
| - """Get Key form of listed URI.
|
| + def __init__(self, storage_url, root_object=None):
|
| + """Creates a BucketListingRef of type prefix.
|
|
|
| - Returns:
|
| - Subclass of boto.s3.key.Key.
|
| -
|
| - Raises:
|
| - BucketListingRefException: for bucket-only uri.
|
| + Args:
|
| + storage_url: StorageUrl containing a prefix.
|
| + root_object: Underlying object metadata, if available.
|
| """
|
| - # For gsutil ls -l gs://bucket self.key will be populated from (boto)
|
| - # parsing the bucket listing. But as noted and handled below there are
|
| - # cases where self.key isn't populated.
|
| - if not self.key:
|
| - if not self.uri.names_object():
|
| - raise BucketListingRefException(
|
| - 'Attempt to call GetKey() on Key-less BucketListingRef (uri=%s) ' %
|
| - self.uri)
|
| - # This case happens when we do gsutil ls -l on a object name-ful
|
| - # StorageUri with no object-name wildcard. Since the ls command
|
| - # implementation only reads bucket info we need to read the object
|
| - # for this case.
|
| - self.key = self.uri.get_key(validate=False, headers=self.headers)
|
| - # When we retrieve the object this way its last_modified timestamp
|
| - # is formatted in RFC 1123 format, which is different from when we
|
| - # retrieve from the bucket listing (which uses ISO 8601 format), so
|
| - # convert so we consistently return ISO 8601 format.
|
| - tuple_time = (time.strptime(self.key.last_modified,
|
| - '%a, %d %b %Y %H:%M:%S %Z'))
|
| - self.key.last_modified = time.strftime('%Y-%m-%dT%H:%M:%S', tuple_time)
|
| - return self.key
|
| + super(BucketListingPrefix, self).__init__()
|
| + self._ref_type = self._BucketListingRefType.PREFIX
|
| + self._url_string = storage_url.url_string
|
| + self.storage_url = storage_url
|
| + self.root_object = root_object
|
|
|
| - def GetPrefix(self):
|
| - """Get Prefix form of listed URI.
|
|
|
| - Returns:
|
| - boto.s3.prefix.Prefix.
|
| +class BucketListingObject(BucketListingRef):
|
| + """BucketListingRef subclass for objects."""
|
|
|
| - Raises:
|
| - BucketListingRefException: if this object has no Prefix.
|
| + def __init__(self, storage_url, root_object=None):
|
| + """Creates a BucketListingRef of type object.
|
| +
|
| + Args:
|
| + storage_url: StorageUrl containing an object.
|
| + root_object: Underlying object metadata, if available.
|
| """
|
| - if not self.prefix:
|
| - raise BucketListingRefException(
|
| - 'Attempt to call GetPrefix() on Prefix-less BucketListingRef '
|
| - '(uri=%s)' % self.uri)
|
| - return self.prefix
|
| + super(BucketListingObject, self).__init__()
|
| + self._ref_type = self._BucketListingRefType.OBJECT
|
| + self._url_string = storage_url.url_string
|
| + self.storage_url = storage_url
|
| + self.root_object = root_object
|
|
|
| - def __repr__(self):
|
| - """Returns string representation of BucketListingRef."""
|
| - return 'BucketListingRef(%s, HasKey=%s, HasPrefix=%s)' % (
|
| - self.uri, self.HasKey(), self.HasPrefix())
|
| -
|
| -
|
| -class BucketListingRefException(StandardError):
|
| - """Exception thrown for invalid BucketListingRef requests."""
|
| -
|
| - def __init__(self, reason):
|
| - StandardError.__init__(self)
|
| - self.reason = reason
|
| -
|
| - def __repr__(self):
|
| - return 'BucketListingRefException: %s' % self.reason
|
| -
|
| - def __str__(self):
|
| - return 'BucketListingRefException: %s' % self.reason
|
|
|