tools/telemetry/third_party/gsutilz/gslib/wildcard_iterator.py - Issue 1493973002: Remove telemetry/third_party/gsutilz

Unified Diff: tools/telemetry/third_party/gsutilz/gslib/wildcard_iterator.py

Issue 1493973002: Remove telemetry/third_party/gsutilz (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@gsutil_changes

Patch Set: rebase Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/telemetry/third_party/gsutilz/gslib/wildcard_iterator.py

diff --git a/tools/telemetry/third_party/gsutilz/gslib/wildcard_iterator.py b/tools/telemetry/third_party/gsutilz/gslib/wildcard_iterator.py

deleted file mode 100644

index 57b4f638a4bfe7b788ec580b289cc011c315222f..0000000000000000000000000000000000000000

--- a/tools/telemetry/third_party/gsutilz/gslib/wildcard_iterator.py

+++ /dev/null

@@ -1,657 +0,0 @@

-# -*- coding: utf-8 -*-

-# Licensed under the Apache License, Version 2.0 (the "License");

-# you may not use this file except in compliance with the License.

-# You may obtain a copy of the License at

-# http://www.apache.org/licenses/LICENSE-2.0

-# Unless required by applicable law or agreed to in writing, software

-# distributed under the License is distributed on an "AS IS" BASIS,

-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-# See the License for the specific language governing permissions and

-# limitations under the License.

-"""Wildcard iterator class and supporting functions."""

-from __future__ import absolute_import

-import fnmatch

-import glob

-import os

-import re

-import sys

-import textwrap

-from gslib.bucket_listing_ref import BucketListingBucket

-from gslib.bucket_listing_ref import BucketListingObject

-from gslib.bucket_listing_ref import BucketListingPrefix

-from gslib.cloud_api import AccessDeniedException

-from gslib.cloud_api import CloudApi

-from gslib.cloud_api import NotFoundException

-from gslib.exception import CommandException

-from gslib.storage_url import ContainsWildcard

-from gslib.storage_url import StorageUrlFromString

-from gslib.storage_url import StripOneSlash

-from gslib.storage_url import WILDCARD_REGEX

-from gslib.translation_helper import GenerationFromUrlAndString

-from gslib.util import UTF8

-FLAT_LIST_REGEX = re.compile(r'(?P<before>.*?)\*\*(?P<after>.*)')

-class WildcardIterator(object):

- """Class for iterating over Google Cloud Storage strings containing wildcards.

- The base class is abstract; you should instantiate using the

- wildcard_iterator() static factory method, which chooses the right

- implementation depending on the base string.

- """

- # TODO: Standardize on __str__ and __repr__ here and elsewhere. Define both

- # and make one return the other.

- def __repr__(self):

- """Returns string representation of WildcardIterator."""

- return 'WildcardIterator(%s)' % self.wildcard_url.url_string

-class CloudWildcardIterator(WildcardIterator):

- """WildcardIterator subclass for buckets, bucket subdirs and objects.

- Iterates over BucketListingRef matching the Url string wildcard. It's

- much more efficient to first get metadata that's available in the Bucket

- (for example to get the name and size of each object), because that

- information is available in the object list results.

- """

- def __init__(self, wildcard_url, gsutil_api, all_versions=False,

- debug=0, project_id=None):

- """Instantiates an iterator that matches the wildcard URL.

- Args:

- wildcard_url: CloudUrl that contains the wildcard to iterate.

- gsutil_api: Cloud storage interface. Passed in for thread safety, also

- settable for testing/mocking.

- all_versions: If true, the iterator yields all versions of objects

- matching the wildcard. If false, yields just the live

- object version.

- debug: Debug level to control debug output for iterator.

- project_id: Project ID to use for bucket listings.

- """

- self.wildcard_url = wildcard_url

- self.all_versions = all_versions

- self.debug = debug

- self.gsutil_api = gsutil_api

- self.project_id = project_id

- def __iter__(self, bucket_listing_fields=None,

- expand_top_level_buckets=False):

- """Iterator that gets called when iterating over the cloud wildcard.

- In the case where no wildcard is present, returns a single matching object,

- single matching prefix, or one of each if both exist.

- Args:

- bucket_listing_fields: Iterable fields to include in bucket listings.

- Ex. ['name', 'acl']. Iterator is

- responsible for converting these to list-style

- format ['items/name', 'items/acl'] as well as

- adding any fields necessary for listing such as

- prefixes. API implemenation is responsible for

- adding pagination fields. If this is None,

- all fields are returned.

- expand_top_level_buckets: If true, yield no BUCKET references. Instead,

- expand buckets into top-level objects and

- prefixes.

- Yields:

- BucketListingRef of type BUCKET, OBJECT or PREFIX.

- """

- single_version_request = self.wildcard_url.HasGeneration()

- # For wildcard expansion purposes, we need at a minimum the name of

- # each object and prefix. If we're not using the default of requesting

- # all fields, make sure at least these are requested. The Cloud API

- # tolerates specifying the same field twice.

- get_fields = None

- if bucket_listing_fields:

- get_fields = set()

- for field in bucket_listing_fields:

- get_fields.add(field)

- bucket_listing_fields = self._GetToListFields(

- get_fields=bucket_listing_fields)

- bucket_listing_fields.update(['items/name', 'prefixes'])

- get_fields.update(['name'])

- # If we're making versioned requests, ensure generation and

- # metageneration are also included.

- if single_version_request or self.all_versions:

- bucket_listing_fields.update(['items/generation',

- 'items/metageneration'])

- get_fields.update(['generation', 'metageneration'])

- # Handle bucket wildcarding, if any, in _ExpandBucketWildcards. Then

- # iterate over the expanded bucket strings and handle any object

- # wildcarding.

- for bucket_listing_ref in self._ExpandBucketWildcards(bucket_fields=['id']):

- bucket_url_string = bucket_listing_ref.url_string

- if self.wildcard_url.IsBucket():

- # IsBucket() guarantees there are no prefix or object wildcards, and

- # thus this is a top-level listing of buckets.

- if expand_top_level_buckets:

- url = StorageUrlFromString(bucket_url_string)

- for obj_or_prefix in self.gsutil_api.ListObjects(

- url.bucket_name, delimiter='/', all_versions=self.all_versions,

- provider=self.wildcard_url.scheme,

- fields=bucket_listing_fields):

- if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:

- yield self._GetObjectRef(bucket_url_string, obj_or_prefix.data,

- with_version=self.all_versions)

- else: # CloudApi.CsObjectOrPrefixType.PREFIX:

- yield self._GetPrefixRef(bucket_url_string, obj_or_prefix.data)

- else:

- yield bucket_listing_ref

- else:

- # By default, assume a non-wildcarded URL is an object, not a prefix.

- # This prevents unnecessary listings (which are slower, more expensive,

- # and also subject to eventual consistency).

- if (not ContainsWildcard(self.wildcard_url.url_string) and

- self.wildcard_url.IsObject() and not self.all_versions):

- try:

- get_object = self.gsutil_api.GetObjectMetadata(

- self.wildcard_url.bucket_name,

- self.wildcard_url.object_name,

- generation=self.wildcard_url.generation,

- provider=self.wildcard_url.scheme,

- fields=get_fields)

- yield self._GetObjectRef(

- self.wildcard_url.bucket_url_string, get_object,

- with_version=(self.all_versions or single_version_request))

- return

- except (NotFoundException, AccessDeniedException):

- # It's possible this is a prefix - try to list instead.

- pass

- # Expand iteratively by building prefix/delimiter bucket listing

- # request, filtering the results per the current level's wildcard

- # (if present), and continuing with the next component of the

- # wildcard. See _BuildBucketFilterStrings() documentation for details.

- if single_version_request:

- url_string = '%s%s#%s' % (bucket_url_string,

- self.wildcard_url.object_name,

- self.wildcard_url.generation)

- else:

- # Rstrip any prefixes to correspond with rstripped prefix wildcard

- # from _BuildBucketFilterStrings().

- url_string = '%s%s' % (bucket_url_string,

- StripOneSlash(self.wildcard_url.object_name)

- or '/') # Cover root object named '/' case.

- urls_needing_expansion = [url_string]

- while urls_needing_expansion:

- url = StorageUrlFromString(urls_needing_expansion.pop(0))

- (prefix, delimiter, prefix_wildcard, suffix_wildcard) = (

- self._BuildBucketFilterStrings(url.object_name))

- prog = re.compile(fnmatch.translate(prefix_wildcard))

- # List bucket for objects matching prefix up to delimiter.

- for obj_or_prefix in self.gsutil_api.ListObjects(

- url.bucket_name, prefix=prefix, delimiter=delimiter,

- all_versions=self.all_versions or single_version_request,

- provider=self.wildcard_url.scheme,

- fields=bucket_listing_fields):

- if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:

- gcs_object = obj_or_prefix.data

- if prog.match(gcs_object.name):

- if not suffix_wildcard or (

- StripOneSlash(gcs_object.name) == suffix_wildcard):

- if not single_version_request or (

- self._SingleVersionMatches(gcs_object.generation)):

- yield self._GetObjectRef(

- bucket_url_string, gcs_object, with_version=(

- self.all_versions or single_version_request))

- else: # CloudApi.CsObjectOrPrefixType.PREFIX

- prefix = obj_or_prefix.data

- # If the prefix ends with a slash, remove it. Note that we only

- # remove one slash so that we can successfully enumerate dirs

- # containing multiple slashes.

- rstripped_prefix = StripOneSlash(prefix)

- if prog.match(rstripped_prefix):

- if suffix_wildcard and rstripped_prefix != suffix_wildcard:

- # There's more wildcard left to expand.

- url_append_string = '%s%s' % (

- bucket_url_string, rstripped_prefix + '/' +

- suffix_wildcard)

- urls_needing_expansion.append(url_append_string)

- else:

- # No wildcard to expand, just yield the prefix

- yield self._GetPrefixRef(bucket_url_string, prefix)

- def _BuildBucketFilterStrings(self, wildcard):

- """Builds strings needed for querying a bucket and filtering results.

- This implements wildcard object name matching.

- Args:

- wildcard: The wildcard string to match to objects.

- Returns:

- (prefix, delimiter, prefix_wildcard, suffix_wildcard)

- where:

- prefix is the prefix to be sent in bucket GET request.

- delimiter is the delimiter to be sent in bucket GET request.

- prefix_wildcard is the wildcard to be used to filter bucket GET results.

- suffix_wildcard is wildcard to be appended to filtered bucket GET

- results for next wildcard expansion iteration.

- For example, given the wildcard gs://bucket/abc/d*e/f*.txt we

- would build prefix= abc/d, delimiter=/, prefix_wildcard=d*e, and

- suffix_wildcard=f*.txt. Using this prefix and delimiter for a bucket

- listing request will then produce a listing result set that can be

- filtered using this prefix_wildcard; and we'd use this suffix_wildcard

- to feed into the next call(s) to _BuildBucketFilterStrings(), for the

- next iteration of listing/filtering.

- Raises:

- AssertionError if wildcard doesn't contain any wildcard chars.

- """

- # Generate a request prefix if the object name part of the wildcard starts

- # with a non-wildcard string (e.g., that's true for 'gs://bucket/abc*xyz').

- match = WILDCARD_REGEX.search(wildcard)

- if not match:

- # Input "wildcard" has no wildcard chars, so just return tuple that will

- # cause a bucket listing to match the given input wildcard. Example: if

- # previous iteration yielded gs://bucket/dir/ with suffix_wildcard abc,

- # the next iteration will call _BuildBucketFilterStrings() with

- # gs://bucket/dir/abc, and we will return prefix ='dir/abc',

- # delimiter='/', prefix_wildcard='dir/abc', and suffix_wildcard=''.

- prefix = wildcard

- delimiter = '/'

- prefix_wildcard = wildcard

- suffix_wildcard = ''

- else:

- if match.start() > 0:

- # Wildcard does not occur at beginning of object name, so construct a

- # prefix string to send to server.

- prefix = wildcard[:match.start()]

- wildcard_part = wildcard[match.start():]

- else:

- prefix = None

- wildcard_part = wildcard

- end = wildcard_part.find('/')

- if end != -1:

- wildcard_part = wildcard_part[:end+1]

- # Remove trailing '/' so we will match gs://bucket/abc* as well as

- # gs://bucket/abc*/ with the same wildcard regex.

- prefix_wildcard = StripOneSlash((prefix or '') + wildcard_part)

- suffix_wildcard = wildcard[match.end():]

- end = suffix_wildcard.find('/')

- if end == -1:

- suffix_wildcard = ''

- else:

- suffix_wildcard = suffix_wildcard[end+1:]

- # To implement recursive (**) wildcarding, if prefix_wildcard

- # suffix_wildcard starts with '**' don't send a delimiter, and combine

- # suffix_wildcard at end of prefix_wildcard.

- if prefix_wildcard.find('**') != -1:

- delimiter = None

- prefix_wildcard += suffix_wildcard

- suffix_wildcard = ''

- else:

- delimiter = '/'

- # The following debug output is useful for tracing how the algorithm

- # walks through a multi-part wildcard like gs://bucket/abc/d*e/f*.txt

- if self.debug > 1:

- sys.stderr.write(

- 'DEBUG: wildcard=%s, prefix=%s, delimiter=%s, '

- 'prefix_wildcard=%s, suffix_wildcard=%s\n' %

- (wildcard, prefix, delimiter, prefix_wildcard, suffix_wildcard))

- return (prefix, delimiter, prefix_wildcard, suffix_wildcard)

- def _SingleVersionMatches(self, listed_generation):

- decoded_generation = GenerationFromUrlAndString(self.wildcard_url,

- listed_generation)

- return str(self.wildcard_url.generation) == str(decoded_generation)

- def _ExpandBucketWildcards(self, bucket_fields=None):

- """Expands bucket and provider wildcards.

- Builds a list of bucket url strings that can be iterated on.

- Args:

- bucket_fields: If present, populate only these metadata fields for

- buckets. Example value: ['acl', 'defaultObjectAcl']

- Yields:

- BucketListingRefereneces of type BUCKET.

- """

- bucket_url = StorageUrlFromString(self.wildcard_url.bucket_url_string)

- if (bucket_fields and set(bucket_fields) == set(['id']) and

- not ContainsWildcard(self.wildcard_url.bucket_name)):

- # If we just want the name of a non-wildcarded bucket URL,

- # don't make an RPC.

- yield BucketListingBucket(bucket_url)

- elif(self.wildcard_url.IsBucket() and

- not ContainsWildcard(self.wildcard_url.bucket_name)):

- # If we have a non-wildcarded bucket URL, get just that bucket.

- yield BucketListingBucket(

- bucket_url, root_object=self.gsutil_api.GetBucket(

- self.wildcard_url.bucket_name, provider=self.wildcard_url.scheme,

- fields=bucket_fields))

- else:

- regex = fnmatch.translate(self.wildcard_url.bucket_name)

- prog = re.compile(regex)

- fields = self._GetToListFields(bucket_fields)

- if fields:

- fields.add('items/id')

- for bucket in self.gsutil_api.ListBuckets(

- fields=fields, project_id=self.project_id,

- provider=self.wildcard_url.scheme):

- if prog.match(bucket.id):

- url = StorageUrlFromString(

- '%s://%s/' % (self.wildcard_url.scheme, bucket.id))

- yield BucketListingBucket(url, root_object=bucket)

- def _GetToListFields(self, get_fields=None):

- """Prepends 'items/' to the input fields and converts it to a set.

- This way field sets requested for GetBucket can be used in ListBucket calls.

- Note that the input set must contain only bucket or object fields; listing

- fields such as prefixes or nextPageToken should be added after calling

- this function.

- Args:

- get_fields: Iterable fields usable in GetBucket/GetObject calls.

- Returns:

- Set of fields usable in ListBuckets/ListObjects calls.

- """

- if get_fields:

- list_fields = set()

- for field in get_fields:

- list_fields.add('items/' + field)

- return list_fields

- def _GetObjectRef(self, bucket_url_string, gcs_object, with_version=False):

- """Creates a BucketListingRef of type OBJECT from the arguments.

- Args:

- bucket_url_string: Wildcardless string describing the containing bucket.

- gcs_object: gsutil_api root Object for populating the BucketListingRef.

- with_version: If true, return a reference with a versioned string.

- Returns:

- BucketListingRef of type OBJECT.

- """

- # Generation can be None in test mocks, so just return the

- # live object for simplicity.

- if with_version and gcs_object.generation is not None:

- generation_str = GenerationFromUrlAndString(self.wildcard_url,

- gcs_object.generation)

- object_string = '%s%s#%s' % (bucket_url_string, gcs_object.name,

- generation_str)

- else:

- object_string = '%s%s' % (bucket_url_string, gcs_object.name)

- object_url = StorageUrlFromString(object_string)

- return BucketListingObject(object_url, root_object=gcs_object)

- def _GetPrefixRef(self, bucket_url_string, prefix):

- """Creates a BucketListingRef of type PREFIX from the arguments.

- Args:

- bucket_url_string: Wildcardless string describing the containing bucket.

- prefix: gsutil_api Prefix for populating the BucketListingRef

- Returns:

- BucketListingRef of type PREFIX.

- """

- prefix_url = StorageUrlFromString('%s%s' % (bucket_url_string, prefix))

- return BucketListingPrefix(prefix_url, root_object=prefix)

- def IterBuckets(self, bucket_fields=None):

- """Iterates over the wildcard, returning refs for each expanded bucket.

- This ignores the object part of the URL entirely and expands only the

- the bucket portion. It will yield BucketListingRefs of type BUCKET only.

- Args:

- bucket_fields: Iterable fields to include in bucket listings.

- Ex. ['defaultObjectAcl', 'logging']. This function is

- responsible for converting these to listing-style

- format ['items/defaultObjectAcl', 'items/logging'], as

- well as adding any fields necessary for listing such as

- 'items/id'. API implemenation is responsible for

- adding pagination fields. If this is None, all fields are

- returned.

- Yields:

- BucketListingRef of type BUCKET, or empty iterator if no matches.

- """

- for blr in self._ExpandBucketWildcards(bucket_fields=bucket_fields):

- yield blr

- def IterAll(self, bucket_listing_fields=None, expand_top_level_buckets=False):

- """Iterates over the wildcard, yielding bucket, prefix or object refs.

- Args:

- bucket_listing_fields: If present, populate only these metadata

- fields for listed objects.

- expand_top_level_buckets: If true and the wildcard expands only to

- Bucket(s), yields the expansion of each bucket

- into a top-level listing of prefixes and objects

- in that bucket instead of a BucketListingRef

- to that bucket.

- Yields:

- BucketListingRef, or empty iterator if no matches.

- """

- for blr in self.__iter__(

- bucket_listing_fields=bucket_listing_fields,

- expand_top_level_buckets=expand_top_level_buckets):

- yield blr

- def IterObjects(self, bucket_listing_fields=None):

- """Iterates over the wildcard, yielding only object BucketListingRefs.

- Args:

- bucket_listing_fields: If present, populate only these metadata

- fields for listed objects.

- Yields:

- BucketListingRefs of type OBJECT or empty iterator if no matches.

- """

- for blr in self.__iter__(bucket_listing_fields=bucket_listing_fields,

- expand_top_level_buckets=True):

- if blr.IsObject():

- yield blr

-class FileWildcardIterator(WildcardIterator):

- """WildcardIterator subclass for files and directories.

- If you use recursive wildcards ('**') only a single such wildcard is

- supported. For example you could use the wildcard '**/*.txt' to list all .txt

- files in any subdirectory of the current directory, but you couldn't use a

- wildcard like '**/abc/**/*.txt' (which would, if supported, let you find .txt

- files in any subdirectory named 'abc').

- """

- def __init__(self, wildcard_url, debug=0):

- """Instantiates an iterator over BucketListingRefs matching wildcard URL.

- Args:

- wildcard_url: FileUrl that contains the wildcard to iterate.

- debug: Debug level (range 0..3).

- """

- self.wildcard_url = wildcard_url

- self.debug = debug

- def __iter__(self):

- """Iterator that gets called when iterating over the file wildcard.

- In the case where no wildcard is present, returns a single matching file

- or directory.

- Raises:

- WildcardException: if invalid wildcard found.

- Yields:

- BucketListingRef of type OBJECT (for files) or PREFIX (for directories)

- """

- wildcard = self.wildcard_url.object_name

- match = FLAT_LIST_REGEX.match(wildcard)

- if match:

- # Recursive wildcarding request ('.../**/...').

- # Example input: wildcard = '/tmp/tmp2pQJAX/**/*'

- base_dir = match.group('before')[:-1]

- remaining_wildcard = match.group('after')

- # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and

- # remaining_wildcard = '/*'

- if remaining_wildcard.startswith('*'):

- raise WildcardException('Invalid wildcard with more than 2 consecutive '

- '*s (%s)' % wildcard)

- # If there was no remaining wildcard past the recursive wildcard,

- # treat it as if it were a '*'. For example, file://tmp/** is equivalent

- # to file://tmp/**/*

- if not remaining_wildcard:

- remaining_wildcard = '*'

- # Skip slash(es).

- remaining_wildcard = remaining_wildcard.lstrip(os.sep)

- filepaths = self._IterDir(base_dir, remaining_wildcard)

- else:

- # Not a recursive wildcarding request.

- filepaths = glob.iglob(wildcard)

- for filepath in filepaths:

- expanded_url = StorageUrlFromString(filepath)

- if os.path.isdir(filepath):

- yield BucketListingPrefix(expanded_url)

- else:

- yield BucketListingObject(expanded_url)

- def _IterDir(self, directory, wildcard):

- """An iterator over the specified dir and wildcard."""

- # UTF8-encode directory before passing it to os.walk() so if there are

- # non-valid UTF8 chars in the file name (e.g., that can happen if the file

- # originated on Windows) os.walk() will not attempt to decode and then die

- # with a "codec can't decode byte" error, and instead we can catch the error

- # at yield time and print a more informative error message.

- for dirpath, unused_dirnames, filenames in os.walk(directory.encode(UTF8)):

- for f in fnmatch.filter(filenames, wildcard):

- try:

- yield os.path.join(dirpath, f).decode(UTF8)

- except UnicodeDecodeError:

- # Note: We considered several ways to deal with this, but each had

- # problems:

- # 1. Raise an exception and try to catch in a higher layer (the

- # gsutil cp command), so we can properly support the gsutil cp -c

- # option. That doesn't work because raising an exception during

- # iteration terminates the generator.

- # 2. Accumulate a list of bad filenames and skip processing each

- # during iteration, then raise at the end, with exception text

- # printing the bad paths. That doesn't work because iteration is

- # wrapped in PluralityCheckableIterator, so it's possible there

- # are not-yet-performed copy operations at the time we reach the

- # end of the iteration and raise the exception - which would cause

- # us to skip copying validly named files. Moreover, the gsutil

- # cp command loops over argv, so if you run the command gsutil cp

- # -rc dir1 dir2 gs://bucket, an invalid unicode name inside dir1

- # would cause dir2 never to be visited.

- # 3. Print the invalid pathname and skip it during iteration. That

- # would work but would mean gsutil cp could exit with status 0

- # even though some files weren't copied.

- # 4. Change the WildcardIterator to include an error status along with

- # the result. That would solve the problem but would be a

- # substantial change (WildcardIterator is used in many parts of

- # gsutil), and we didn't feel that magnitude of change was

- # warranted by this relatively uncommon corner case.

- # Instead we chose to abort when one such file is encountered, and

- # require the user to remove or rename the files and try again.

- raise CommandException('\n'.join(textwrap.wrap(

- 'Invalid Unicode path encountered (%s). gsutil cannot proceed '

- 'with such files present. Please remove or rename this file and '

- 'try again. NOTE: the path printed above replaces the '

- 'problematic characters with a hex-encoded printable '

- 'representation. For more details (including how to convert to a '

- 'gsutil-compatible encoding) see `gsutil help encoding`.' %

- repr(os.path.join(dirpath, f)))))

- # pylint: disable=unused-argument

- def IterObjects(self, bucket_listing_fields=None):

- """Iterates over the wildcard, yielding only object (file) refs.

- Args:

- bucket_listing_fields: Ignored as filesystems don't have buckets.

- Yields:

- BucketListingRefs of type OBJECT or empty iterator if no matches.

- """

- for bucket_listing_ref in self.IterAll():

- if bucket_listing_ref.IsObject():

- yield bucket_listing_ref

- # pylint: disable=unused-argument

- def IterAll(self, bucket_listing_fields=None, expand_top_level_buckets=False):

- """Iterates over the wildcard, yielding BucketListingRefs.

- Args:

- bucket_listing_fields: Ignored; filesystems don't have buckets.

- expand_top_level_buckets: Ignored; filesystems don't have buckets.

- Yields:

- BucketListingRefs of type OBJECT (file) or PREFIX (directory),

- or empty iterator if no matches.

- """

- for bucket_listing_ref in self.__iter__():

- yield bucket_listing_ref

- def IterBuckets(self, unused_bucket_fields=None):

- """Placeholder to allow polymorphic use of WildcardIterator.

- Args:

- unused_bucket_fields: Ignored; filesystems don't have buckets.

- Raises:

- WildcardException: in all cases.

- """

- raise WildcardException(

- 'Iterating over Buckets not possible for file wildcards')

-class WildcardException(StandardError):

- """Exception raised for invalid wildcard URLs."""

- def __init__(self, reason):

- StandardError.__init__(self)

- self.reason = reason

- def __repr__(self):

- return 'WildcardException: %s' % self.reason

- def __str__(self):

- return 'WildcardException: %s' % self.reason

-def CreateWildcardIterator(url_str, gsutil_api, all_versions=False, debug=0,

- project_id=None):

- """Instantiate a WildcardIterator for the given URL string.

- Args:

- url_str: URL string naming wildcard object(s) to iterate.

- gsutil_api: Cloud storage interface. Passed in for thread safety, also

- settable for testing/mocking.

- all_versions: If true, the iterator yields all versions of objects

- matching the wildcard. If false, yields just the live

- object version.

- debug: Debug level to control debug output for iterator.

- project_id: Project id to use for bucket listings.

- Returns:

- A WildcardIterator that handles the requested iteration.

- """

- url = StorageUrlFromString(url_str)

- if url.IsFileUrl():

- return FileWildcardIterator(url, debug=debug)

- else: # Cloud URL

- return CloudWildcardIterator(

- url, gsutil_api, all_versions=all_versions, debug=debug,

- project_id=project_id)

« no previous file with comments | « tools/telemetry/third_party/gsutilz/gslib/util.py ('k') | tools/telemetry/third_party/gsutilz/gsutil » ('j') | no next file with comments »