| Index: tools/telemetry/third_party/gsutilz/gslib/storage_url.py
|
| diff --git a/tools/telemetry/third_party/gsutilz/gslib/storage_url.py b/tools/telemetry/third_party/gsutilz/gslib/storage_url.py
|
| deleted file mode 100644
|
| index 657883cd71cadacde9c713bc628cfc217a89991a..0000000000000000000000000000000000000000
|
| --- a/tools/telemetry/third_party/gsutilz/gslib/storage_url.py
|
| +++ /dev/null
|
| @@ -1,324 +0,0 @@
|
| -# -*- coding: utf-8 -*-
|
| -# Copyright 2013 Google Inc. All Rights Reserved.
|
| -#
|
| -# Licensed under the Apache License, Version 2.0 (the "License");
|
| -# you may not use this file except in compliance with the License.
|
| -# You may obtain a copy of the License at
|
| -#
|
| -# http://www.apache.org/licenses/LICENSE-2.0
|
| -#
|
| -# Unless required by applicable law or agreed to in writing, software
|
| -# distributed under the License is distributed on an "AS IS" BASIS,
|
| -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -# See the License for the specific language governing permissions and
|
| -# limitations under the License.
|
| -"""File and Cloud URL representation classes."""
|
| -
|
| -from __future__ import absolute_import
|
| -
|
| -import os
|
| -import re
|
| -
|
| -from gslib.exception import InvalidUrlError
|
| -
|
| -# Matches provider strings of the form 'gs://'
|
| -PROVIDER_REGEX = re.compile(r'(?P<provider>[^:]*)://$')
|
| -# Matches bucket strings of the form 'gs://bucket'
|
| -BUCKET_REGEX = re.compile(r'(?P<provider>[^:]*)://(?P<bucket>[^/]*)/{0,1}$')
|
| -# Matches object strings of the form 'gs://bucket/obj'
|
| -OBJECT_REGEX = re.compile(
|
| - r'(?P<provider>[^:]*)://(?P<bucket>[^/]*)/(?P<object>.*)')
|
| -# Matches versioned object strings of the form 'gs://bucket/obj#1234'
|
| -GS_GENERATION_REGEX = re.compile(r'(?P<object>.+)#(?P<generation>[0-9]+)$')
|
| -# Matches versioned object strings of the form 's3://bucket/obj#NULL'
|
| -S3_VERSION_REGEX = re.compile(r'(?P<object>.+)#(?P<version_id>.+)$')
|
| -# Matches file strings of the form 'file://dir/filename'
|
| -FILE_OBJECT_REGEX = re.compile(r'([^:]*://)(?P<filepath>.*)')
|
| -# Regex to disallow buckets violating charset or not [3..255] chars total.
|
| -BUCKET_NAME_RE = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9\._-]{1,253}[a-zA-Z0-9]$')
|
| -# Regex to disallow buckets with individual DNS labels longer than 63.
|
| -TOO_LONG_DNS_NAME_COMP = re.compile(r'[-_a-z0-9]{64}')
|
| -# Regex to determine if a string contains any wildcards.
|
| -WILDCARD_REGEX = re.compile(r'[*?\[\]]')
|
| -
|
| -
|
| -class StorageUrl(object):
|
| - """Abstract base class for file and Cloud Storage URLs."""
|
| -
|
| - def Clone(self):
|
| - raise NotImplementedError('Clone not overridden')
|
| -
|
| - def IsFileUrl(self):
|
| - raise NotImplementedError('IsFileUrl not overridden')
|
| -
|
| - def IsCloudUrl(self):
|
| - raise NotImplementedError('IsCloudUrl not overridden')
|
| -
|
| - def IsStream(self):
|
| - raise NotImplementedError('IsStream not overridden')
|
| -
|
| - def CreatePrefixUrl(self, wildcard_suffix=None):
|
| - """Returns a prefix of this URL that can be used for iterating.
|
| -
|
| - Args:
|
| - wildcard_suffix: If supplied, this wildcard suffix will be appended to the
|
| - prefix with a trailing slash before being returned.
|
| -
|
| - Returns:
|
| - A prefix of this URL that can be used for iterating.
|
| -
|
| - If this URL contains a trailing slash, it will be stripped to create the
|
| - prefix. This helps avoid infinite looping when prefixes are iterated, but
|
| - preserves other slashes so that objects with '/' in the name are handled
|
| - properly.
|
| -
|
| - For example, when recursively listing a bucket with the following contents:
|
| - gs://bucket// <-- object named slash
|
| - gs://bucket//one-dir-deep
|
| - a top-level expansion with '/' as a delimiter will result in the following
|
| - URL strings:
|
| - 'gs://bucket//' : OBJECT
|
| - 'gs://bucket//' : PREFIX
|
| - If we right-strip all slashes from the prefix entry and add a wildcard
|
| - suffix, we will get 'gs://bucket/*' which will produce identical results
|
| - (and infinitely recurse).
|
| -
|
| - Example return values:
|
| - ('gs://bucket/subdir/', '*') becomes 'gs://bucket/subdir/*'
|
| - ('gs://bucket/', '*') becomes 'gs://bucket/*'
|
| - ('gs://bucket/', None) becomes 'gs://bucket'
|
| - ('gs://bucket/subdir//', '*') becomes 'gs://bucket/subdir//*'
|
| - ('gs://bucket/subdir///', '**') becomes 'gs://bucket/subdir///**'
|
| - ('gs://bucket/subdir/', '*') where 'subdir/' is an object becomes
|
| - 'gs://bucket/subdir/*', but iterating on this will return 'subdir/'
|
| - as a BucketListingObject, so we will not recurse on it as a subdir
|
| - during listing.
|
| - """
|
| - raise NotImplementedError('CreatePrefixUrl not overridden')
|
| -
|
| - @property
|
| - def url_string(self):
|
| - raise NotImplementedError('url_string not overridden')
|
| -
|
| - @property
|
| - def versionless_url_string(self):
|
| - raise NotImplementedError('versionless_url_string not overridden')
|
| -
|
| - def __eq__(self, other):
|
| - return self.url_string == other.url_string
|
| -
|
| - def __hash__(self):
|
| - return hash(self.url_string)
|
| -
|
| -
|
| -class _FileUrl(StorageUrl):
|
| - """File URL class providing parsing and convenience methods.
|
| -
|
| - This class assists with usage and manipulation of an
|
| - (optionally wildcarded) file URL string. Depending on the string
|
| - contents, this class represents one or more directories or files.
|
| -
|
| - For File URLs, scheme is always file, bucket_name is always blank,
|
| - and object_name contains the file/directory path.
|
| - """
|
| -
|
| - def __init__(self, url_string, is_stream=False):
|
| - self.scheme = 'file'
|
| - self.bucket_name = ''
|
| - match = FILE_OBJECT_REGEX.match(url_string)
|
| - if match and match.lastindex == 2:
|
| - self.object_name = match.group(2)
|
| - else:
|
| - self.object_name = url_string
|
| - self.generation = None
|
| - self.is_stream = is_stream
|
| - self.delim = os.sep
|
| -
|
| - def Clone(self):
|
| - return _FileUrl(self.url_string)
|
| -
|
| - def IsFileUrl(self):
|
| - return True
|
| -
|
| - def IsCloudUrl(self):
|
| - return False
|
| -
|
| - def IsStream(self):
|
| - return self.is_stream
|
| -
|
| - def IsDirectory(self):
|
| - return not self.IsStream() and os.path.isdir(self.object_name)
|
| -
|
| - def CreatePrefixUrl(self, wildcard_suffix=None):
|
| - return self.url_string
|
| -
|
| - @property
|
| - def url_string(self):
|
| - return '%s://%s' % (self.scheme, self.object_name)
|
| -
|
| - @property
|
| - def versionless_url_string(self):
|
| - return self.url_string
|
| -
|
| - def __str__(self):
|
| - return self.url_string
|
| -
|
| -
|
| -class _CloudUrl(StorageUrl):
|
| - """Cloud URL class providing parsing and convenience methods.
|
| -
|
| - This class assists with usage and manipulation of an
|
| - (optionally wildcarded) cloud URL string. Depending on the string
|
| - contents, this class represents a provider, bucket(s), or object(s).
|
| -
|
| - This class operates only on strings. No cloud storage API calls are
|
| - made from this class.
|
| - """
|
| -
|
| - def __init__(self, url_string):
|
| - self.scheme = None
|
| - self.bucket_name = None
|
| - self.object_name = None
|
| - self.generation = None
|
| - self.delim = '/'
|
| - provider_match = PROVIDER_REGEX.match(url_string)
|
| - bucket_match = BUCKET_REGEX.match(url_string)
|
| - if provider_match:
|
| - self.scheme = provider_match.group('provider')
|
| - elif bucket_match:
|
| - self.scheme = bucket_match.group('provider')
|
| - self.bucket_name = bucket_match.group('bucket')
|
| - if (not ContainsWildcard(self.bucket_name) and
|
| - (not BUCKET_NAME_RE.match(self.bucket_name) or
|
| - TOO_LONG_DNS_NAME_COMP.search(self.bucket_name))):
|
| - raise InvalidUrlError('Invalid bucket name in URL "%s"' % url_string)
|
| - else:
|
| - object_match = OBJECT_REGEX.match(url_string)
|
| - if object_match:
|
| - self.scheme = object_match.group('provider')
|
| - self.bucket_name = object_match.group('bucket')
|
| - self.object_name = object_match.group('object')
|
| - if self.scheme == 'gs':
|
| - generation_match = GS_GENERATION_REGEX.match(self.object_name)
|
| - if generation_match:
|
| - self.object_name = generation_match.group('object')
|
| - self.generation = generation_match.group('generation')
|
| - elif self.scheme == 's3':
|
| - version_match = S3_VERSION_REGEX.match(self.object_name)
|
| - if version_match:
|
| - self.object_name = version_match.group('object')
|
| - self.generation = version_match.group('version_id')
|
| - else:
|
| - raise InvalidUrlError(
|
| - 'CloudUrl: URL string %s did not match URL regex' % url_string)
|
| -
|
| - def Clone(self):
|
| - return _CloudUrl(self.url_string)
|
| -
|
| - def IsFileUrl(self):
|
| - return False
|
| -
|
| - def IsCloudUrl(self):
|
| - return True
|
| -
|
| - def IsStream(self):
|
| - raise NotImplementedError('IsStream not supported on CloudUrl')
|
| -
|
| - def IsBucket(self):
|
| - return bool(self.bucket_name and not self.object_name)
|
| -
|
| - def IsObject(self):
|
| - return bool(self.bucket_name and self.object_name)
|
| -
|
| - def HasGeneration(self):
|
| - return bool(self.generation)
|
| -
|
| - def IsProvider(self):
|
| - return bool(self.scheme and not self.bucket_name)
|
| -
|
| - def CreatePrefixUrl(self, wildcard_suffix=None):
|
| - prefix = StripOneSlash(self.versionless_url_string)
|
| - if wildcard_suffix:
|
| - prefix = '%s/%s' % (prefix, wildcard_suffix)
|
| - return prefix
|
| -
|
| - @property
|
| - def bucket_url_string(self):
|
| - return '%s://%s/' % (self.scheme, self.bucket_name)
|
| -
|
| - @property
|
| - def url_string(self):
|
| - url_str = self.versionless_url_string
|
| - if self.HasGeneration():
|
| - url_str += '#%s' % self.generation
|
| - return url_str
|
| -
|
| - @property
|
| - def versionless_url_string(self):
|
| - if self.IsProvider():
|
| - return '%s://' % self.scheme
|
| - elif self.IsBucket():
|
| - return self.bucket_url_string
|
| - return '%s://%s/%s' % (self.scheme, self.bucket_name, self.object_name)
|
| -
|
| - def __str__(self):
|
| - return self.url_string
|
| -
|
| -
|
| -def _GetSchemeFromUrlString(url_str):
|
| - """Returns scheme component of a URL string."""
|
| -
|
| - end_scheme_idx = url_str.find('://')
|
| - if end_scheme_idx == -1:
|
| - # File is the default scheme.
|
| - return 'file'
|
| - else:
|
| - return url_str[0:end_scheme_idx].lower()
|
| -
|
| -
|
| -def _GetPathFromUrlString(url_str):
|
| - """Returns path component of a URL string."""
|
| -
|
| - end_scheme_idx = url_str.find('://')
|
| - if end_scheme_idx == -1:
|
| - return url_str
|
| - else:
|
| - return url_str[end_scheme_idx + 3:]
|
| -
|
| -
|
| -def IsFileUrlString(url_str):
|
| - """Returns whether a string is a file URL."""
|
| -
|
| - return _GetSchemeFromUrlString(url_str) == 'file'
|
| -
|
| -
|
| -def StorageUrlFromString(url_str):
|
| - """Static factory function for creating a StorageUrl from a string."""
|
| -
|
| - scheme = _GetSchemeFromUrlString(url_str)
|
| -
|
| - if scheme not in ('file', 's3', 'gs'):
|
| - raise InvalidUrlError('Unrecognized scheme "%s"' % scheme)
|
| - if scheme == 'file':
|
| - path = _GetPathFromUrlString(url_str)
|
| - is_stream = (path == '-')
|
| - return _FileUrl(url_str, is_stream=is_stream)
|
| - return _CloudUrl(url_str)
|
| -
|
| -
|
| -def StripOneSlash(url_str):
|
| - if url_str and url_str.endswith('/'):
|
| - return url_str[:-1]
|
| - return url_str
|
| -
|
| -
|
| -def ContainsWildcard(url_string):
|
| - """Checks whether url_string contains a wildcard.
|
| -
|
| - Args:
|
| - url_string: URL string to check.
|
| -
|
| - Returns:
|
| - bool indicator.
|
| - """
|
| - return bool(WILDCARD_REGEX.search(url_string))
|
|
|