tools/telemetry/third_party/gsutilz/gslib/storage_url.py - Issue 1493973002: Remove telemetry/third_party/gsutilz

Unified Diff: tools/telemetry/third_party/gsutilz/gslib/storage_url.py

Issue 1493973002: Remove telemetry/third_party/gsutilz (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@gsutil_changes

Patch Set: rebase Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « tools/telemetry/third_party/gsutilz/gslib/storage_uri_builder.py ('k') | tools/telemetry/third_party/gsutilz/gslib/tab_complete.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/telemetry/third_party/gsutilz/gslib/storage_url.py

diff --git a/tools/telemetry/third_party/gsutilz/gslib/storage_url.py b/tools/telemetry/third_party/gsutilz/gslib/storage_url.py

deleted file mode 100644

index 657883cd71cadacde9c713bc628cfc217a89991a..0000000000000000000000000000000000000000

--- a/tools/telemetry/third_party/gsutilz/gslib/storage_url.py

+++ /dev/null

@@ -1,324 +0,0 @@

-# -*- coding: utf-8 -*-

-# Licensed under the Apache License, Version 2.0 (the "License");

-# you may not use this file except in compliance with the License.

-# You may obtain a copy of the License at

-# http://www.apache.org/licenses/LICENSE-2.0

-# Unless required by applicable law or agreed to in writing, software

-# distributed under the License is distributed on an "AS IS" BASIS,

-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-# See the License for the specific language governing permissions and

-# limitations under the License.

-"""File and Cloud URL representation classes."""

-from __future__ import absolute_import

-import os

-import re

-from gslib.exception import InvalidUrlError

-# Matches provider strings of the form 'gs://'

-PROVIDER_REGEX = re.compile(r'(?P<provider>[^:]*)://$')

-# Matches bucket strings of the form 'gs://bucket'

-BUCKET_REGEX = re.compile(r'(?P<provider>[^:]*)://(?P<bucket>[^/]*)/{0,1}$')

-# Matches object strings of the form 'gs://bucket/obj'

-OBJECT_REGEX = re.compile(

- r'(?P<provider>[^:]*)://(?P<bucket>[^/]*)/(?P<object>.*)')

-# Matches versioned object strings of the form 'gs://bucket/obj#1234'

-GS_GENERATION_REGEX = re.compile(r'(?P<object>.+)#(?P<generation>[0-9]+)$')

-# Matches versioned object strings of the form 's3://bucket/obj#NULL'

-S3_VERSION_REGEX = re.compile(r'(?P<object>.+)#(?P<version_id>.+)$')

-# Matches file strings of the form 'file://dir/filename'

-FILE_OBJECT_REGEX = re.compile(r'([^:]*://)(?P<filepath>.*)')

-# Regex to disallow buckets violating charset or not [3..255] chars total.

-BUCKET_NAME_RE = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9\._-]{1,253}[a-zA-Z0-9]$')

-# Regex to disallow buckets with individual DNS labels longer than 63.

-TOO_LONG_DNS_NAME_COMP = re.compile(r'[-_a-z0-9]{64}')

-# Regex to determine if a string contains any wildcards.

-WILDCARD_REGEX = re.compile(r'[*?\[\]]')

-class StorageUrl(object):

- """Abstract base class for file and Cloud Storage URLs."""

- def Clone(self):

- raise NotImplementedError('Clone not overridden')

- def IsFileUrl(self):

- raise NotImplementedError('IsFileUrl not overridden')

- def IsCloudUrl(self):

- raise NotImplementedError('IsCloudUrl not overridden')

- def IsStream(self):

- raise NotImplementedError('IsStream not overridden')

- def CreatePrefixUrl(self, wildcard_suffix=None):

- """Returns a prefix of this URL that can be used for iterating.

- Args:

- wildcard_suffix: If supplied, this wildcard suffix will be appended to the

- prefix with a trailing slash before being returned.

- Returns:

- A prefix of this URL that can be used for iterating.

- If this URL contains a trailing slash, it will be stripped to create the

- prefix. This helps avoid infinite looping when prefixes are iterated, but

- preserves other slashes so that objects with '/' in the name are handled

- properly.

- For example, when recursively listing a bucket with the following contents:

- gs://bucket// <-- object named slash

- gs://bucket//one-dir-deep

- a top-level expansion with '/' as a delimiter will result in the following

- URL strings:

- 'gs://bucket//' : OBJECT

- 'gs://bucket//' : PREFIX

- If we right-strip all slashes from the prefix entry and add a wildcard

- suffix, we will get 'gs://bucket/*' which will produce identical results

- (and infinitely recurse).

- Example return values:

- ('gs://bucket/subdir/', '*') becomes 'gs://bucket/subdir/*'

- ('gs://bucket/', '*') becomes 'gs://bucket/*'

- ('gs://bucket/', None) becomes 'gs://bucket'

- ('gs://bucket/subdir//', '*') becomes 'gs://bucket/subdir//*'

- ('gs://bucket/subdir///', '**') becomes 'gs://bucket/subdir///**'

- ('gs://bucket/subdir/', '*') where 'subdir/' is an object becomes

- 'gs://bucket/subdir/*', but iterating on this will return 'subdir/'

- as a BucketListingObject, so we will not recurse on it as a subdir

- during listing.

- """

- raise NotImplementedError('CreatePrefixUrl not overridden')

- @property

- def url_string(self):

- raise NotImplementedError('url_string not overridden')

- @property

- def versionless_url_string(self):

- raise NotImplementedError('versionless_url_string not overridden')

- def __eq__(self, other):

- return self.url_string == other.url_string

- def __hash__(self):

- return hash(self.url_string)

-class _FileUrl(StorageUrl):

- """File URL class providing parsing and convenience methods.

- This class assists with usage and manipulation of an

- (optionally wildcarded) file URL string. Depending on the string

- contents, this class represents one or more directories or files.

- For File URLs, scheme is always file, bucket_name is always blank,

- and object_name contains the file/directory path.

- """

- def __init__(self, url_string, is_stream=False):

- self.scheme = 'file'

- self.bucket_name = ''

- match = FILE_OBJECT_REGEX.match(url_string)

- if match and match.lastindex == 2:

- self.object_name = match.group(2)

- else:

- self.object_name = url_string

- self.generation = None

- self.is_stream = is_stream

- self.delim = os.sep

- def Clone(self):

- return _FileUrl(self.url_string)

- def IsFileUrl(self):

- return True

- def IsCloudUrl(self):

- return False

- def IsStream(self):

- return self.is_stream

- def IsDirectory(self):

- return not self.IsStream() and os.path.isdir(self.object_name)

- def CreatePrefixUrl(self, wildcard_suffix=None):

- return self.url_string

- @property

- def url_string(self):

- return '%s://%s' % (self.scheme, self.object_name)

- @property

- def versionless_url_string(self):

- return self.url_string

- def __str__(self):

- return self.url_string

-class _CloudUrl(StorageUrl):

- """Cloud URL class providing parsing and convenience methods.

- This class assists with usage and manipulation of an

- (optionally wildcarded) cloud URL string. Depending on the string

- contents, this class represents a provider, bucket(s), or object(s).

- This class operates only on strings. No cloud storage API calls are

- made from this class.

- """

- def __init__(self, url_string):

- self.scheme = None

- self.bucket_name = None

- self.object_name = None

- self.generation = None

- self.delim = '/'

- provider_match = PROVIDER_REGEX.match(url_string)

- bucket_match = BUCKET_REGEX.match(url_string)

- if provider_match:

- self.scheme = provider_match.group('provider')

- elif bucket_match:

- self.scheme = bucket_match.group('provider')

- self.bucket_name = bucket_match.group('bucket')

- if (not ContainsWildcard(self.bucket_name) and

- (not BUCKET_NAME_RE.match(self.bucket_name) or

- TOO_LONG_DNS_NAME_COMP.search(self.bucket_name))):

- raise InvalidUrlError('Invalid bucket name in URL "%s"' % url_string)

- else:

- object_match = OBJECT_REGEX.match(url_string)

- if object_match:

- self.scheme = object_match.group('provider')

- self.bucket_name = object_match.group('bucket')

- self.object_name = object_match.group('object')

- if self.scheme == 'gs':

- generation_match = GS_GENERATION_REGEX.match(self.object_name)

- if generation_match:

- self.object_name = generation_match.group('object')

- self.generation = generation_match.group('generation')

- elif self.scheme == 's3':

- version_match = S3_VERSION_REGEX.match(self.object_name)

- if version_match:

- self.object_name = version_match.group('object')

- self.generation = version_match.group('version_id')

- else:

- raise InvalidUrlError(

- 'CloudUrl: URL string %s did not match URL regex' % url_string)

- def Clone(self):

- return _CloudUrl(self.url_string)

- def IsFileUrl(self):

- return False

- def IsCloudUrl(self):

- return True

- def IsStream(self):

- raise NotImplementedError('IsStream not supported on CloudUrl')

- def IsBucket(self):

- return bool(self.bucket_name and not self.object_name)

- def IsObject(self):

- return bool(self.bucket_name and self.object_name)

- def HasGeneration(self):

- return bool(self.generation)

- def IsProvider(self):

- return bool(self.scheme and not self.bucket_name)

- def CreatePrefixUrl(self, wildcard_suffix=None):

- prefix = StripOneSlash(self.versionless_url_string)

- if wildcard_suffix:

- prefix = '%s/%s' % (prefix, wildcard_suffix)

- return prefix

- @property

- def bucket_url_string(self):

- return '%s://%s/' % (self.scheme, self.bucket_name)

- @property

- def url_string(self):

- url_str = self.versionless_url_string

- if self.HasGeneration():

- url_str += '#%s' % self.generation

- return url_str

- @property

- def versionless_url_string(self):

- if self.IsProvider():

- return '%s://' % self.scheme

- elif self.IsBucket():

- return self.bucket_url_string

- return '%s://%s/%s' % (self.scheme, self.bucket_name, self.object_name)

- def __str__(self):

- return self.url_string

-def _GetSchemeFromUrlString(url_str):

- """Returns scheme component of a URL string."""

- end_scheme_idx = url_str.find('://')

- if end_scheme_idx == -1:

- # File is the default scheme.

- return 'file'

- else:

- return url_str[0:end_scheme_idx].lower()

-def _GetPathFromUrlString(url_str):

- """Returns path component of a URL string."""

- end_scheme_idx = url_str.find('://')

- if end_scheme_idx == -1:

- return url_str

- else:

- return url_str[end_scheme_idx + 3:]

-def IsFileUrlString(url_str):

- """Returns whether a string is a file URL."""

- return _GetSchemeFromUrlString(url_str) == 'file'

-def StorageUrlFromString(url_str):

- """Static factory function for creating a StorageUrl from a string."""

- scheme = _GetSchemeFromUrlString(url_str)

- if scheme not in ('file', 's3', 'gs'):

- raise InvalidUrlError('Unrecognized scheme "%s"' % scheme)

- if scheme == 'file':

- path = _GetPathFromUrlString(url_str)

- is_stream = (path == '-')

- return _FileUrl(url_str, is_stream=is_stream)

- return _CloudUrl(url_str)

-def StripOneSlash(url_str):

- if url_str and url_str.endswith('/'):

- return url_str[:-1]

- return url_str

-def ContainsWildcard(url_string):

- """Checks whether url_string contains a wildcard.

- Args:

- url_string: URL string to check.

- Returns:

- bool indicator.

- """

- return bool(WILDCARD_REGEX.search(url_string))