Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(506)

Unified Diff: gslib/name_expansion.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « gslib/ls_helper.py ('k') | gslib/no_op_auth_plugin.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: gslib/name_expansion.py
===================================================================
--- gslib/name_expansion.py (revision 33376)
+++ gslib/name_expansion.py (working copy)
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
# Copyright 2012 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,28 +12,32 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Name expansion iterator and result classes.
-import copy
-import multiprocessing
-import wildcard_iterator
-
-from bucket_listing_ref import BucketListingRef
-from gslib.exception import CommandException
-from gslib.plurality_checkable_iterator import PluralityCheckableIterator
-from gslib.storage_uri_builder import StorageUriBuilder
-from wildcard_iterator import ContainsWildcard
-
-"""
Name expansion support for the various ways gsutil lets users refer to
collections of data (via explicit wildcarding as well as directory,
bucket, and bucket subdir implicit wildcarding). This class encapsulates
the various rules for determining how these expansions are done.
"""
+# Disable warnings for NameExpansionIteratorQueue functions; they implement
+# an interface which does not follow lint guidelines.
+# pylint: disable=invalid-name
+from __future__ import absolute_import
+
+import multiprocessing
+import os
+import sys
+
+from gslib.exception import CommandException
+from gslib.plurality_checkable_iterator import PluralityCheckableIterator
+import gslib.wildcard_iterator
+from gslib.wildcard_iterator import StorageUrlFromString
+
+
class NameExpansionResult(object):
- """
- Holds one fully expanded result from iterating over NameExpansionIterator.
+ """Holds one fully expanded result from iterating over NameExpansionIterator.
The member data in this class need to be pickleable because
NameExpansionResult instances are passed through Multiprocessing.Queue. In
@@ -42,139 +47,67 @@
significant overhead).
The state held in this object is needed for handling the various naming cases
- (e.g., copying from a single source URI to a directory generates different
- dest URI names than copying multiple URIs to a directory, to be consistent
+ (e.g., copying from a single source URL to a directory generates different
+ dest URL names than copying multiple URLs to a directory, to be consistent
with naming rules used by the Unix cp command). For more details see comments
in _NameExpansionIterator.
"""
- def __init__(self, src_uri_str, is_multi_src_request,
- src_uri_expands_to_multi, names_container, expanded_uri_str,
- have_existing_dst_container=None, is_latest=False):
- """
+ def __init__(self, source_storage_url, is_multi_source_request,
+ names_container, expanded_storage_url):
+ """Instantiates a result from name expansion.
+
Args:
- src_uri_str: string representation of StorageUri that was expanded.
- is_multi_src_request: bool indicator whether src_uri_str expanded to more
- than 1 BucketListingRef.
- src_uri_expands_to_multi: bool indicator whether the current src_uri
- expanded to more than 1 BucketListingRef.
- names_container: Bool indicator whether src_uri names a container.
- expanded_uri_str: string representation of StorageUri to which src_uri_str
- expands.
- have_existing_dst_container: bool indicator whether this is a copy
- request to an existing bucket, bucket subdir, or directory. Default
- None value should be used in cases where this is not needed (commands
- other than cp).
- is_latest: Bool indicating that the result represents the object's current
- version.
+ source_storage_url: StorageUrl that was being expanded.
+ is_multi_source_request: bool indicator whether src_url_str expanded to
+ more than one BucketListingRef.
+ names_container: Bool indicator whether src_url names a container.
+ expanded_storage_url: StorageUrl that was expanded.
"""
- self.src_uri_str = src_uri_str
- self.is_multi_src_request = is_multi_src_request
- self.src_uri_expands_to_multi = src_uri_expands_to_multi
+ self.source_storage_url = source_storage_url
+ self.is_multi_source_request = is_multi_source_request
self.names_container = names_container
- self.expanded_uri_str = expanded_uri_str
- self.have_existing_dst_container = have_existing_dst_container
- self.is_latest = is_latest
+ self.expanded_storage_url = expanded_storage_url
def __repr__(self):
- return '%s' % self.expanded_uri_str
+ return '%s' % self._expanded_storage_url
- def IsEmpty(self):
- """Returns True if name expansion yielded no matches."""
- return self.expanded_blr is None
- def GetSrcUriStr(self):
- """Returns the string representation of the StorageUri that was expanded."""
- return self.src_uri_str
-
- def IsMultiSrcRequest(self):
- """
- Returns bool indicator whether name expansion resulted in more than 0
- BucketListingRef.
- """
- return self.is_multi_src_request
-
- def SrcUriExpandsToMulti(self):
- """
- Returns bool indicator whether the current src_uri expanded to more than
- 1 BucketListingRef
- """
- return self.src_uri_expands_to_multi
-
- def NamesContainer(self):
- """
- Returns bool indicator of whether src_uri names a directory, bucket, or
- bucket subdir.
- """
- return self.names_container
-
- def GetExpandedUriStr(self):
- """
- Returns the string representation of StorageUri to which src_uri_str
- expands.
- """
- return self.expanded_uri_str
-
- def HaveExistingDstContainer(self):
- """Returns bool indicator whether this is a copy request to an
- existing bucket, bucket subdir, or directory, or None if not
- relevant."""
- return self.have_existing_dst_container
-
-
class _NameExpansionIterator(object):
- """
- Iterates over all src_uris, expanding wildcards, object-less bucket names,
- subdir bucket names, and directory names, generating a flat listing of all
- the matching objects/files.
+ """Class that iterates over all source URLs passed to the iterator.
- You should instantiate this object using the static factory function
- NameExpansionIterator, because consumers of this iterator need the
- PluralityCheckableIterator wrapper built by that function.
-
- Yields:
- gslib.name_expansion.NameExpansionResult.
-
- Raises:
- CommandException: if errors encountered.
+ See details in __iter__ function doc.
"""
- def __init__(self, command_name, proj_id_handler, headers, debug, logger,
- bucket_storage_uri_class, uri_strs, recursion_requested,
- have_existing_dst_container=None, flat=True,
- all_versions=False, for_all_version_delete=False,
- cmd_supports_recursion=True):
- """
+ def __init__(self, command_name, debug, logger, gsutil_api, url_strs,
+ recursion_requested, all_versions=False,
+ cmd_supports_recursion=True, project_id=None,
+ continue_on_error=False):
+ """Creates a NameExpansionIterator.
+
Args:
command_name: name of command being run.
- proj_id_handler: ProjectIdHandler to use for current command.
- headers: Dictionary containing optional HTTP headers to pass to boto.
- debug: Debug level to pass in to boto connection (range 0..3).
+ debug: Debug level to pass to underlying iterators (range 0..3).
logger: logging.Logger object.
- bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
- Settable for testing/mocking.
- uri_strs: PluralityCheckableIterator of URI strings needing expansion.
- recursion_requested: True if -R specified on command-line.
- have_existing_dst_container: Bool indicator whether this is a copy
- request to an existing bucket, bucket subdir, or directory. Default
- None value should be used in cases where this is not needed (commands
- other than cp).
- flat: Bool indicating whether bucket listings should be flattened, i.e.,
- so the mapped-to results contain objects spanning subdirectories.
+ gsutil_api: Cloud storage interface. Settable for testing/mocking.
+ url_strs: PluralityCheckableIterator of URL strings needing expansion.
+ recursion_requested: True if -R specified on command-line. If so,
+ listings will be flattened so mapped-to results contain objects
+ spanning subdirectories.
all_versions: Bool indicating whether to iterate over all object versions.
- for_all_version_delete: Bool indicating whether this is for an all-version
- delete.
- cmd_supports_recursion: Bool indicating whether this command supports a '-R'
- flag. Useful for printing helpful error messages.
+ cmd_supports_recursion: Bool indicating whether this command supports a
+ '-R' flag. Useful for printing helpful error messages.
+ project_id: Project id to use for bucket retrieval.
+ continue_on_error: If true, yield no-match exceptions encountered during
+ iteration instead of raising them.
- Examples of _NameExpansionIterator with flat=True:
- - Calling with one of the uri_strs being 'gs://bucket' will enumerate all
+ Examples of _NameExpansionIterator with recursion_requested=True:
+ - Calling with one of the url_strs being 'gs://bucket' will enumerate all
top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
- 'gs://bucket/**' will enumerate all objects in the bucket.
- - 'gs://bucket/abc' will enumerate all next-level objects under directory
- abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
- matches any objects; otherwise it will enumerate the single name
- gs://bucket/abc
+ - 'gs://bucket/abc' will enumerate either the single object abc or, if
+ abc is a subdirectory, all objects under abc and any of its
+ subdirectories.
- 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
subdirectories.
- 'file:///tmp' will enumerate all files under /tmp, as will
@@ -182,8 +115,9 @@
- 'file:///tmp/**' will enumerate all files under /tmp or any of its
subdirectories.
- Example if flat=False: calling with gs://bucket/abc/* lists matching objects
- or subdirs, but not sub-subdirs or objects beneath subdirs.
+ Example if recursion_requested=False:
+ calling with gs://bucket/abc/* lists matching objects
+ or subdirs, but not sub-subdirs or objects beneath subdirs.
Note: In step-by-step comments below we give examples assuming there's a
gs://bucket with object paths:
@@ -197,205 +131,219 @@
dir/c/
"""
self.command_name = command_name
- self.proj_id_handler = proj_id_handler
- self.headers = headers
self.debug = debug
self.logger = logger
- self.bucket_storage_uri_class = bucket_storage_uri_class
- self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)
- self.uri_strs = uri_strs
+ self.gsutil_api = gsutil_api
+ self.url_strs = url_strs
self.recursion_requested = recursion_requested
- self.have_existing_dst_container = have_existing_dst_container
- self.flat = flat
self.all_versions = all_versions
- # Check self.uri_strs.has_plurality() at start because its value can change
- # if uri_strs is itself an iterator.
- self.uri_strs.has_plurality = self.uri_strs.has_plurality()
+ # Check self.url_strs.HasPlurality() at start because its value can change
+ # if url_strs is itself an iterator.
+ self.url_strs.has_plurality = self.url_strs.HasPlurality()
self.cmd_supports_recursion = cmd_supports_recursion
+ self.project_id = project_id
+ self.continue_on_error = continue_on_error
# Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
# (A flat listing means show all objects expanded all the way down.)
self._flatness_wildcard = {True: '**', False: '*'}
def __iter__(self):
- for uri_str in self.uri_strs:
+ """Iterates over all source URLs passed to the iterator.
+
+ For each src url, expands wildcards, object-less bucket names,
+ subdir bucket names, and directory names, and generates a flat listing of
+ all the matching objects/files.
+
+ You should instantiate this object using the static factory function
+ NameExpansionIterator, because consumers of this iterator need the
+ PluralityCheckableIterator wrapper built by that function.
+
+ Yields:
+ gslib.name_expansion.NameExpansionResult.
+
+ Raises:
+ CommandException: if errors encountered.
+ """
+ for url_str in self.url_strs:
+ storage_url = StorageUrlFromString(url_str)
+
+ if storage_url.IsFileUrl() and storage_url.IsStream():
+ if self.url_strs.has_plurality:
+ raise CommandException('Multiple URL strings are not supported '
+ 'with streaming ("-") URLs.')
+ yield NameExpansionResult(storage_url, False, False, storage_url)
+ continue
+
# Step 1: Expand any explicitly specified wildcards. The output from this
# step is an iterator of BucketListingRef.
# Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd
- if ContainsWildcard(uri_str):
- post_step1_iter = self._WildcardIterator(uri_str)
+
+ src_names_bucket = False
+ if (storage_url.IsCloudUrl() and storage_url.IsBucket()
+ and not self.recursion_requested):
+ # UNIX commands like rm and cp will omit directory references.
+ # If url_str refers only to buckets and we are not recursing,
+ # then produce references of type BUCKET, because they are guaranteed
+ # to pass through Step 2 and be omitted in Step 3.
+ post_step1_iter = PluralityCheckableIterator(
+ self.WildcardIterator(url_str).IterBuckets(
+ bucket_fields=['id']))
else:
- suri = self.suri_builder.StorageUri(uri_str)
- post_step1_iter = iter([BucketListingRef(suri)])
- post_step1_iter = PluralityCheckableIterator(post_step1_iter)
+ # Get a list of objects and prefixes, expanding the top level for
+ # any listed buckets. If our source is a bucket, however, we need
+ # to treat all of the top level expansions as names_container=True.
+ post_step1_iter = PluralityCheckableIterator(
+ self.WildcardIterator(url_str).IterAll(
+ bucket_listing_fields=['name'],
+ expand_top_level_buckets=True))
+ if storage_url.IsCloudUrl() and storage_url.IsBucket():
+ src_names_bucket = True
- # Step 2: Expand bucket subdirs and versions. The output from this
+ # Step 2: Expand bucket subdirs. The output from this
# step is an iterator of (names_container, BucketListingRef).
# Starting with gs://bucket/abcd this step would expand to:
# iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
- if self.flat and self.recursion_requested:
- post_step2_iter = _ImplicitBucketSubdirIterator(self,
- post_step1_iter, self.flat)
- elif self.all_versions:
- post_step2_iter = _AllVersionIterator(self, post_step1_iter,
- headers=self.headers)
+ subdir_exp_wildcard = self._flatness_wildcard[self.recursion_requested]
+ if self.recursion_requested:
+ post_step2_iter = _ImplicitBucketSubdirIterator(
+ self, post_step1_iter, subdir_exp_wildcard)
else:
post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
post_step2_iter = PluralityCheckableIterator(post_step2_iter)
- # Step 3. Expand directories and buckets. This step yields the iterated
+ # Because we actually perform and check object listings here, this will
+ # raise if url_args includes a non-existent object. However,
+ # plurality_checkable_iterator will buffer the exception for us, not
+ # raising it until the iterator is actually asked to yield the first
+ # result.
+ if post_step2_iter.IsEmpty():
+ if self.continue_on_error:
+ try:
+ raise CommandException('No URLs matched: %s' % url_str)
+ except CommandException, e:
+ # Yield a specialized tuple of (exception, stack_trace) to
+ # the wrapping PluralityCheckableIterator.
+ yield (e, sys.exc_info()[2])
+ else:
+ raise CommandException('No URLs matched: %s' % url_str)
+
+ # Step 3. Omit any directories, buckets, or bucket subdirectories for
+ # non-recursive expansions.
+ post_step3_iter = PluralityCheckableIterator(_OmitNonRecursiveIterator(
+ post_step2_iter, self.recursion_requested, self.command_name,
+ self.cmd_supports_recursion, self.logger))
+
+ src_url_expands_to_multi = post_step3_iter.HasPlurality()
+ is_multi_source_request = (self.url_strs.has_plurality
+ or src_url_expands_to_multi)
+
+ # Step 4. Expand directories and buckets. This step yields the iterated
# values. Starting with gs://bucket this step would expand to:
# [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
# Starting with file://dir this step would expand to:
# [dir/a.txt, dir/b.txt, dir/c/]
- exp_src_bucket_listing_refs = []
- wc = self._flatness_wildcard[self.flat]
- src_uri_expands_to_multi = (post_step1_iter.has_plurality()
- or post_step2_iter.has_plurality())
- is_multi_src_request = (self.uri_strs.has_plurality
- or src_uri_expands_to_multi)
+ for (names_container, blr) in post_step3_iter:
+ src_names_container = src_names_bucket or names_container
- if post_step2_iter.is_empty():
- raise CommandException('No URIs matched: %s' % uri_str)
- for (names_container, blr) in post_step2_iter:
- if (not blr.GetUri().names_container()
- and (self.flat or not blr.HasPrefix())):
- yield NameExpansionResult(uri_str, is_multi_src_request,
- src_uri_expands_to_multi, names_container,
- blr.GetUriString(),
- self.have_existing_dst_container,
- is_latest=blr.IsLatest())
- continue
- if not self.recursion_requested:
- if blr.GetUri().is_file_uri():
- desc = 'directory'
- elif blr.GetUri().names_bucket():
- desc = 'bucket'
- else:
- desc = 'bucket subdir'
- if self.cmd_supports_recursion:
- self.logger.info(
- 'Omitting %s "%s". (Did you mean to do %s -R?)',
- desc, blr.GetUri(), self.command_name)
- else:
- self.logger.info('Omitting %s "%s".', desc, blr.GetUri())
- continue
- if blr.GetUri().is_file_uri():
- # Convert dir to implicit recursive wildcard.
- uri_to_iterate = '%s/%s' % (blr.GetUriString(), wc)
+ if blr.IsObject():
+ yield NameExpansionResult(
+ storage_url, is_multi_source_request, src_names_container,
+ blr.storage_url)
else:
- # Convert bucket to implicit recursive wildcard.
- uri_to_iterate = blr.GetUri().clone_replace_name(wc)
- wc_iter = PluralityCheckableIterator(
- self._WildcardIterator(uri_to_iterate))
- src_uri_expands_to_multi = (src_uri_expands_to_multi
- or wc_iter.has_plurality())
- is_multi_src_request = (self.uri_strs.has_plurality
- or src_uri_expands_to_multi)
- for blr in wc_iter:
- yield NameExpansionResult(uri_str, is_multi_src_request,
- src_uri_expands_to_multi, True,
- blr.GetUriString(),
- self.have_existing_dst_container,
- is_latest=blr.IsLatest())
+ # Use implicit wildcarding to do the enumeration.
+ # At this point we are guaranteed that:
+ # - Recursion has been requested because non-object entries are
+ # filtered in step 3 otherwise.
+ # - This is a prefix or bucket subdirectory because only
+ # non-recursive iterations product bucket references.
+ expanded_url = StorageUrlFromString(blr.url_string)
+ if expanded_url.IsFileUrl():
+ # Convert dir to implicit recursive wildcard.
+ url_to_iterate = '%s%s%s' % (blr, os.sep, subdir_exp_wildcard)
+ else:
+ # Convert subdir to implicit recursive wildcard.
+ url_to_iterate = expanded_url.CreatePrefixUrl(
+ wildcard_suffix=subdir_exp_wildcard)
- def _WildcardIterator(self, uri_or_str):
- """
- Helper to instantiate gslib.WildcardIterator. Args are same as
- gslib.WildcardIterator interface, but this method fills in most of the
- values from instance state.
+ wc_iter = PluralityCheckableIterator(
+ self.WildcardIterator(url_to_iterate).IterObjects(
+ bucket_listing_fields=['name']))
+ src_url_expands_to_multi = (src_url_expands_to_multi
+ or wc_iter.HasPlurality())
+ is_multi_source_request = (self.url_strs.has_plurality
+ or src_url_expands_to_multi)
+ # This will be a flattened listing of all underlying objects in the
+ # subdir.
+ for blr in wc_iter:
+ yield NameExpansionResult(
+ storage_url, is_multi_source_request, True, blr.storage_url)
+ def WildcardIterator(self, url_string):
+ """Helper to instantiate gslib.WildcardIterator.
+
+ Args are same as gslib.WildcardIterator interface, but this method fills
+ in most of the values from instance state.
+
Args:
- uri_or_str: StorageUri or URI string naming wildcard objects to iterate.
+ url_string: URL string naming wildcard objects to iterate.
+
+ Returns:
+ Wildcard iterator over URL string.
"""
- return wildcard_iterator.wildcard_iterator(
- uri_or_str, self.proj_id_handler,
- bucket_storage_uri_class=self.bucket_storage_uri_class,
- headers=self.headers, debug=self.debug,
- all_versions=self.all_versions)
+ return gslib.wildcard_iterator.CreateWildcardIterator(
+ url_string, self.gsutil_api, debug=self.debug,
+ all_versions=self.all_versions,
+ project_id=self.project_id)
-def NameExpansionIterator(command_name, proj_id_handler, headers, debug,
- logger, bucket_storage_uri_class, uri_strs,
- recursion_requested,
- have_existing_dst_container=None, flat=True,
- all_versions=False,
- for_all_version_delete=False,
- cmd_supports_recursion=True):
- """
- Static factory function for instantiating _NameExpansionIterator, which
- wraps the resulting iterator in a PluralityCheckableIterator and checks
- that it is non-empty. Also, allows uri_strs can be either an array or an
+def NameExpansionIterator(command_name, debug, logger, gsutil_api, url_strs,
+ recursion_requested, all_versions=False,
+ cmd_supports_recursion=True, project_id=None,
+ continue_on_error=False):
+ """Static factory function for instantiating _NameExpansionIterator.
+
+ This wraps the resulting iterator in a PluralityCheckableIterator and checks
+ that it is non-empty. Also, allows url_strs to be either an array or an
iterator.
Args:
command_name: name of command being run.
- proj_id_handler: ProjectIdHandler to use for current command.
- headers: Dictionary containing optional HTTP headers to pass to boto.
- debug: Debug level to pass in to boto connection (range 0..3).
+ debug: Debug level to pass to underlying iterators (range 0..3).
logger: logging.Logger object.
- bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
- Settable for testing/mocking.
- uri_strs: PluralityCheckableIterator of URI strings needing expansion.
- recursion_requested: True if -R specified on command-line.
- have_existing_dst_container: Bool indicator whether this is a copy
- request to an existing bucket, bucket subdir, or directory. Default
- None value should be used in cases where this is not needed (commands
- other than cp).
- flat: Bool indicating whether bucket listings should be flattened, i.e.,
- so the mapped-to results contain objects spanning subdirectories.
+ gsutil_api: Cloud storage interface. Settable for testing/mocking.
+ url_strs: Iterable URL strings needing expansion.
+ recursion_requested: True if -R specified on command-line. If so,
+ listings will be flattened so mapped-to results contain objects
+ spanning subdirectories.
all_versions: Bool indicating whether to iterate over all object versions.
- for_all_version_delete: Bool indicating whether this is for an all-version
- delete.
cmd_supports_recursion: Bool indicating whether this command supports a '-R'
flag. Useful for printing helpful error messages.
+ project_id: Project id to use for the current command.
+ continue_on_error: If true, yield no-match exceptions encountered during
+ iteration instead of raising them.
- Examples of ExpandWildcardsAndContainers with flat=True:
- - Calling with one of the uri_strs being 'gs://bucket' will enumerate all
- top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
- - 'gs://bucket/**' will enumerate all objects in the bucket.
- - 'gs://bucket/abc' will enumerate all next-level objects under directory
- abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
- matches any objects; otherwise it will enumerate the single name
- gs://bucket/abc
- - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
- subdirectories.
- - 'file:///tmp' will enumerate all files under /tmp, as will
- 'file:///tmp/*'
- - 'file:///tmp/**' will enumerate all files under /tmp or any of its
- subdirectories.
+ Raises:
+ CommandException if underlying iterator is empty.
- Example if flat=False: calling with gs://bucket/abc/* lists matching objects
- or subdirs, but not sub-subdirs or objects beneath subdirs.
+ Returns:
+ Name expansion iterator instance.
- Note: In step-by-step comments below we give examples assuming there's a
- gs://bucket with object paths:
- abcd/o1.txt
- abcd/o2.txt
- xyz/o1.txt
- xyz/o2.txt
- and a directory file://dir with file paths:
- dir/a.txt
- dir/b.txt
- dir/c/
+ For example semantics, see comments in NameExpansionIterator.__init__.
"""
- uri_strs = PluralityCheckableIterator(uri_strs)
+ url_strs = PluralityCheckableIterator(url_strs)
name_expansion_iterator = _NameExpansionIterator(
- command_name, proj_id_handler, headers, debug, logger,
- bucket_storage_uri_class, uri_strs, recursion_requested,
- have_existing_dst_container, flat, all_versions=all_versions,
- for_all_version_delete=for_all_version_delete,
- cmd_supports_recursion=cmd_supports_recursion)
+ command_name, debug, logger, gsutil_api, url_strs, recursion_requested,
+ all_versions=all_versions, cmd_supports_recursion=cmd_supports_recursion,
+ project_id=project_id, continue_on_error=continue_on_error)
name_expansion_iterator = PluralityCheckableIterator(name_expansion_iterator)
- if name_expansion_iterator.is_empty():
- raise CommandException('No URIs matched')
+ if name_expansion_iterator.IsEmpty():
+ raise CommandException('No URLs matched')
return name_expansion_iterator
class NameExpansionIteratorQueue(object):
- """
- Wrapper around NameExpansionIterator that provides a Multiprocessing.Queue
- facade.
+ """Wrapper around NameExpansionIterator with Multiprocessing.Queue interface.
Only a blocking get() function can be called, and the block and timeout
params on that function are ignored. All other class functions raise
@@ -411,28 +359,30 @@
def qsize(self):
raise NotImplementedError(
- "NameExpansionIteratorQueue.qsize() not implemented")
+ 'NameExpansionIteratorQueue.qsize() not implemented')
def empty(self):
raise NotImplementedError(
- "NameExpansionIteratorQueue.empty() not implemented")
+ 'NameExpansionIteratorQueue.empty() not implemented')
def full(self):
raise NotImplementedError(
- "NameExpansionIteratorQueue.full() not implemented")
+ 'NameExpansionIteratorQueue.full() not implemented')
+ # pylint: disable=unused-argument
def put(self, obj=None, block=None, timeout=None):
raise NotImplementedError(
- "NameExpansionIteratorQueue.put() not implemented")
+ 'NameExpansionIteratorQueue.put() not implemented')
def put_nowait(self, obj):
raise NotImplementedError(
- "NameExpansionIteratorQueue.put_nowait() not implemented")
+ 'NameExpansionIteratorQueue.put_nowait() not implemented')
+ # pylint: disable=unused-argument
def get(self, block=None, timeout=None):
self.lock.acquire()
try:
- if self.name_expansion_iterator.is_empty():
+ if self.name_expansion_iterator.IsEmpty():
return self.final_value
return self.name_expansion_iterator.next()
finally:
@@ -440,34 +390,35 @@
def get_nowait(self):
raise NotImplementedError(
- "NameExpansionIteratorQueue.get_nowait() not implemented")
+ 'NameExpansionIteratorQueue.get_nowait() not implemented')
def get_no_wait(self):
raise NotImplementedError(
- "NameExpansionIteratorQueue.get_no_wait() not implemented")
+ 'NameExpansionIteratorQueue.get_no_wait() not implemented')
def close(self):
- raise NotImplementedError(
- "NameExpansionIteratorQueue.close() not implemented")
+ raise NotImplementedError(
+ 'NameExpansionIteratorQueue.close() not implemented')
def join_thread(self):
raise NotImplementedError(
- "NameExpansionIteratorQueue.join_thread() not implemented")
+ 'NameExpansionIteratorQueue.join_thread() not implemented')
def cancel_join_thread(self):
raise NotImplementedError(
- "NameExpansionIteratorQueue.cancel_join_thread() not implemented")
+ 'NameExpansionIteratorQueue.cancel_join_thread() not implemented')
class _NonContainerTuplifyIterator(object):
- """
- Iterator that produces the tuple (False, blr) for each iteration
- of blr_iter. Used for cases where blr_iter iterates over a set of
+ """Iterator that produces the tuple (False, blr) for each iterated value.
+
+ Used for cases where blr_iter iterates over a set of
BucketListingRefs known not to name containers.
"""
def __init__(self, blr_iter):
- """
+ """Instantiates iterator.
+
Args:
blr_iter: iterator of BucketListingRef.
"""
@@ -478,94 +429,102 @@
yield (False, blr)
-class _ImplicitBucketSubdirIterator(object):
+class _OmitNonRecursiveIterator(object):
+ """Iterator wrapper for that omits certain values for non-recursive requests.
+ This iterates over tuples of (names_container, BucketListingReference) and
+ omits directories, prefixes, and buckets from non-recurisve requests
+ so that we can properly calculate whether the source URL expands to multiple
+ URLs.
+
+ For example, if we have a bucket containing two objects: bucket/foo and
+ bucket/foo/bar and we do a non-recursive iteration, only bucket/foo will be
+ yielded.
"""
- Iterator wrapper that iterates over blr_iter, performing implicit bucket
- subdir expansion.
+ def __init__(self, tuple_iter, recursion_requested, command_name,
+ cmd_supports_recursion, logger):
+ """Instanties the iterator.
+
+ Args:
+ tuple_iter: Iterator over names_container, BucketListingReference
+ from step 2 in the NameExpansionIterator
+ recursion_requested: If false, omit buckets, dirs, and subdirs
+ command_name: Command name for user messages
+ cmd_supports_recursion: Command recursion support for user messages
+ logger: Log object for user messages
+ """
+ self.tuple_iter = tuple_iter
+ self.recursion_requested = recursion_requested
+ self.command_name = command_name
+ self.cmd_supports_recursion = cmd_supports_recursion
+ self.logger = logger
+
+ def __iter__(self):
+ for (names_container, blr) in self.tuple_iter:
+ if not self.recursion_requested and not blr.IsObject():
+ # At this point we either have a bucket or a prefix,
+ # so if recursion is not requested, we're going to omit it.
+ expanded_url = StorageUrlFromString(blr.url_string)
+ if expanded_url.IsFileUrl():
+ desc = 'directory'
+ else:
+ desc = blr.type_name
+ if self.cmd_supports_recursion:
+ self.logger.info(
+ 'Omitting %s "%s". (Did you mean to do %s -R?)',
+ desc, blr.url_string, self.command_name)
+ else:
+ self.logger.info('Omitting %s "%s".', desc, blr.url_string)
+ else:
+ yield (names_container, blr)
+
+
+class _ImplicitBucketSubdirIterator(object):
+ """Iterator wrapper that performs implicit bucket subdir expansion.
+
Each iteration yields tuple (names_container, expanded BucketListingRefs)
- where names_container is true if URI names a directory, bucket,
- or bucket subdir (vs how StorageUri.names_container() doesn't
- handle latter case).
+ where names_container is true if URL names a directory, bucket,
+ or bucket subdir.
For example, iterating over [BucketListingRef("gs://abc")] would expand to:
[BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")]
if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise.
"""
- def __init__(self, name_expansion_instance, blr_iter, flat):
- """
+ def __init__(self, name_exp_instance, blr_iter, subdir_exp_wildcard):
+ """Instantiates the iterator.
+
Args:
- name_expansion_instance: calling instance of NameExpansion class.
- blr_iter: iterator of BucketListingRef.
- flat: bool indicating whether bucket listings should be flattened, i.e.,
- so the mapped-to results contain objects spanning subdirectories.
+ name_exp_instance: calling instance of NameExpansion class.
+ blr_iter: iterator over BucketListingRef prefixes and objects.
+ subdir_exp_wildcard: wildcard for expanding subdirectories;
+ expected values are ** if the mapped-to results should contain
+ objects spanning subdirectories, or * if only one level should
+ be listed.
"""
self.blr_iter = blr_iter
- self.name_expansion_instance = name_expansion_instance
- self.flat = flat
+ self.name_exp_instance = name_exp_instance
+ self.subdir_exp_wildcard = subdir_exp_wildcard
def __iter__(self):
for blr in self.blr_iter:
- uri = blr.GetUri()
- if uri.names_object():
- # URI could be a bucket subdir.
+ if blr.IsPrefix():
+ # This is a bucket subdirectory, list objects according to the wildcard.
+ prefix_url = StorageUrlFromString(blr.url_string).CreatePrefixUrl(
+ wildcard_suffix=self.subdir_exp_wildcard)
implicit_subdir_iterator = PluralityCheckableIterator(
- self.name_expansion_instance._WildcardIterator(
- self.name_expansion_instance.suri_builder.StorageUri(
- '%s/%s' % (uri.uri.rstrip('/'),
- self.name_expansion_instance._flatness_wildcard[
- self.flat]))))
- if not implicit_subdir_iterator.is_empty():
+ self.name_exp_instance.WildcardIterator(
+ prefix_url).IterAll(bucket_listing_fields=['name']))
+ if not implicit_subdir_iterator.IsEmpty():
for exp_blr in implicit_subdir_iterator:
yield (True, exp_blr)
else:
+ # Prefix that contains no objects, for example in the $folder$ case
+ # or an empty filesystem directory.
yield (False, blr)
- else:
+ elif blr.IsObject():
yield (False, blr)
-
-class _AllVersionIterator(object):
- """
- Iterator wrapper that iterates over blr_iter, performing implicit version
- expansion.
-
- Output behavior is identical to that in _ImplicitBucketSubdirIterator above.
-
- For example, iterating over [BucketListingRef("gs://abc/o1")] would expand to:
- [BucketListingRef("gs://abc/o1#1234"), BucketListingRef("gs://abc/o1#1235")]
- """
-
- def __init__(self, name_expansion_instance, blr_iter, headers=None):
- """
- Args:
- name_expansion_instance: calling instance of NameExpansion class.
- blr_iter: iterator of BucketListingRef.
- flat: bool indicating whether bucket listings should be flattened, i.e.,
- so the mapped-to results contain objects spanning subdirectories.
- """
- self.blr_iter = blr_iter
- self.name_expansion_instance = name_expansion_instance
- self.headers = headers
-
- def __iter__(self):
- empty = True
- for blr in self.blr_iter:
- uri = blr.GetUri()
- if not uri.names_object():
- empty = False
- yield (True, blr)
- break
- for key in uri.list_bucket(
- prefix=uri.object_name, headers=self.headers, all_versions=True):
- if key.name != uri.object_name:
- # The desired entries will be alphabetically first in this listing.
- break
- version_blr = BucketListingRef(uri.clone_replace_key(key), key=key)
- empty = False
- yield (False, version_blr)
- # If no version exists, yield the unversioned blr, and let the consuming
- # operation fail. This mirrors behavior in _ImplicitBucketSubdirIterator.
- if empty:
- yield (False, blr)
-
+ else:
+ raise CommandException(
+ '_ImplicitBucketSubdirIterator got a bucket reference %s' % blr)
« no previous file with comments | « gslib/ls_helper.py ('k') | gslib/no_op_auth_plugin.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698