| OLD | NEW |
| 1 # -*- coding: utf-8 -*- |
| 1 # Copyright 2012 Google Inc. All Rights Reserved. | 2 # Copyright 2012 Google Inc. All Rights Reserved. |
| 2 # | 3 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
| 6 # | 7 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # | 9 # |
| 9 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, | 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and | 13 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. | 14 # limitations under the License. |
| 15 """Name expansion iterator and result classes. |
| 14 | 16 |
| 15 import copy | |
| 16 import multiprocessing | |
| 17 import wildcard_iterator | |
| 18 | |
| 19 from bucket_listing_ref import BucketListingRef | |
| 20 from gslib.exception import CommandException | |
| 21 from gslib.plurality_checkable_iterator import PluralityCheckableIterator | |
| 22 from gslib.storage_uri_builder import StorageUriBuilder | |
| 23 from wildcard_iterator import ContainsWildcard | |
| 24 | |
| 25 """ | |
| 26 Name expansion support for the various ways gsutil lets users refer to | 17 Name expansion support for the various ways gsutil lets users refer to |
| 27 collections of data (via explicit wildcarding as well as directory, | 18 collections of data (via explicit wildcarding as well as directory, |
| 28 bucket, and bucket subdir implicit wildcarding). This class encapsulates | 19 bucket, and bucket subdir implicit wildcarding). This class encapsulates |
| 29 the various rules for determining how these expansions are done. | 20 the various rules for determining how these expansions are done. |
| 30 """ | 21 """ |
| 31 | 22 |
| 23 # Disable warnings for NameExpansionIteratorQueue functions; they implement |
| 24 # an interface which does not follow lint guidelines. |
| 25 # pylint: disable=invalid-name |
| 26 |
| 27 from __future__ import absolute_import |
| 28 |
| 29 import multiprocessing |
| 30 import os |
| 31 import sys |
| 32 |
| 33 from gslib.exception import CommandException |
| 34 from gslib.plurality_checkable_iterator import PluralityCheckableIterator |
| 35 import gslib.wildcard_iterator |
| 36 from gslib.wildcard_iterator import StorageUrlFromString |
| 37 |
| 32 | 38 |
| 33 class NameExpansionResult(object): | 39 class NameExpansionResult(object): |
| 34 """ | 40 """Holds one fully expanded result from iterating over NameExpansionIterator. |
| 35 Holds one fully expanded result from iterating over NameExpansionIterator. | |
| 36 | 41 |
| 37 The member data in this class need to be pickleable because | 42 The member data in this class need to be pickleable because |
| 38 NameExpansionResult instances are passed through Multiprocessing.Queue. In | 43 NameExpansionResult instances are passed through Multiprocessing.Queue. In |
| 39 particular, don't include any boto state like StorageUri, since that pulls | 44 particular, don't include any boto state like StorageUri, since that pulls |
| 40 in a big tree of objects, some of which aren't pickleable (and even if | 45 in a big tree of objects, some of which aren't pickleable (and even if |
| 41 they were, pickling/unpickling such a large object tree would result in | 46 they were, pickling/unpickling such a large object tree would result in |
| 42 significant overhead). | 47 significant overhead). |
| 43 | 48 |
| 44 The state held in this object is needed for handling the various naming cases | 49 The state held in this object is needed for handling the various naming cases |
| 45 (e.g., copying from a single source URI to a directory generates different | 50 (e.g., copying from a single source URL to a directory generates different |
| 46 dest URI names than copying multiple URIs to a directory, to be consistent | 51 dest URL names than copying multiple URLs to a directory, to be consistent |
| 47 with naming rules used by the Unix cp command). For more details see comments | 52 with naming rules used by the Unix cp command). For more details see comments |
| 48 in _NameExpansionIterator. | 53 in _NameExpansionIterator. |
| 49 """ | 54 """ |
| 50 | 55 |
| 51 def __init__(self, src_uri_str, is_multi_src_request, | 56 def __init__(self, source_storage_url, is_multi_source_request, |
| 52 src_uri_expands_to_multi, names_container, expanded_uri_str, | 57 names_container, expanded_storage_url): |
| 53 have_existing_dst_container=None, is_latest=False): | 58 """Instantiates a result from name expansion. |
| 59 |
| 60 Args: |
| 61 source_storage_url: StorageUrl that was being expanded. |
| 62 is_multi_source_request: bool indicator whether src_url_str expanded to |
| 63 more than one BucketListingRef. |
| 64 names_container: Bool indicator whether src_url names a container. |
| 65 expanded_storage_url: StorageUrl that was expanded. |
| 54 """ | 66 """ |
| 55 Args: | 67 self.source_storage_url = source_storage_url |
| 56 src_uri_str: string representation of StorageUri that was expanded. | 68 self.is_multi_source_request = is_multi_source_request |
| 57 is_multi_src_request: bool indicator whether src_uri_str expanded to more | |
| 58 than 1 BucketListingRef. | |
| 59 src_uri_expands_to_multi: bool indicator whether the current src_uri | |
| 60 expanded to more than 1 BucketListingRef. | |
| 61 names_container: Bool indicator whether src_uri names a container. | |
| 62 expanded_uri_str: string representation of StorageUri to which src_uri_str | |
| 63 expands. | |
| 64 have_existing_dst_container: bool indicator whether this is a copy | |
| 65 request to an existing bucket, bucket subdir, or directory. Default | |
| 66 None value should be used in cases where this is not needed (commands | |
| 67 other than cp). | |
| 68 is_latest: Bool indicating that the result represents the object's current | |
| 69 version. | |
| 70 """ | |
| 71 self.src_uri_str = src_uri_str | |
| 72 self.is_multi_src_request = is_multi_src_request | |
| 73 self.src_uri_expands_to_multi = src_uri_expands_to_multi | |
| 74 self.names_container = names_container | 69 self.names_container = names_container |
| 75 self.expanded_uri_str = expanded_uri_str | 70 self.expanded_storage_url = expanded_storage_url |
| 76 self.have_existing_dst_container = have_existing_dst_container | |
| 77 self.is_latest = is_latest | |
| 78 | 71 |
| 79 def __repr__(self): | 72 def __repr__(self): |
| 80 return '%s' % self.expanded_uri_str | 73 return '%s' % self._expanded_storage_url |
| 81 | |
| 82 def IsEmpty(self): | |
| 83 """Returns True if name expansion yielded no matches.""" | |
| 84 return self.expanded_blr is None | |
| 85 | |
| 86 def GetSrcUriStr(self): | |
| 87 """Returns the string representation of the StorageUri that was expanded.""" | |
| 88 return self.src_uri_str | |
| 89 | |
| 90 def IsMultiSrcRequest(self): | |
| 91 """ | |
| 92 Returns bool indicator whether name expansion resulted in more than 0 | |
| 93 BucketListingRef. | |
| 94 """ | |
| 95 return self.is_multi_src_request | |
| 96 | |
| 97 def SrcUriExpandsToMulti(self): | |
| 98 """ | |
| 99 Returns bool indicator whether the current src_uri expanded to more than | |
| 100 1 BucketListingRef | |
| 101 """ | |
| 102 return self.src_uri_expands_to_multi | |
| 103 | |
| 104 def NamesContainer(self): | |
| 105 """ | |
| 106 Returns bool indicator of whether src_uri names a directory, bucket, or | |
| 107 bucket subdir. | |
| 108 """ | |
| 109 return self.names_container | |
| 110 | |
| 111 def GetExpandedUriStr(self): | |
| 112 """ | |
| 113 Returns the string representation of StorageUri to which src_uri_str | |
| 114 expands. | |
| 115 """ | |
| 116 return self.expanded_uri_str | |
| 117 | |
| 118 def HaveExistingDstContainer(self): | |
| 119 """Returns bool indicator whether this is a copy request to an | |
| 120 existing bucket, bucket subdir, or directory, or None if not | |
| 121 relevant.""" | |
| 122 return self.have_existing_dst_container | |
| 123 | 74 |
| 124 | 75 |
| 125 class _NameExpansionIterator(object): | 76 class _NameExpansionIterator(object): |
| 126 """ | 77 """Class that iterates over all source URLs passed to the iterator. |
| 127 Iterates over all src_uris, expanding wildcards, object-less bucket names, | |
| 128 subdir bucket names, and directory names, generating a flat listing of all | |
| 129 the matching objects/files. | |
| 130 | 78 |
| 131 You should instantiate this object using the static factory function | 79 See details in __iter__ function doc. |
| 132 NameExpansionIterator, because consumers of this iterator need the | |
| 133 PluralityCheckableIterator wrapper built by that function. | |
| 134 | |
| 135 Yields: | |
| 136 gslib.name_expansion.NameExpansionResult. | |
| 137 | |
| 138 Raises: | |
| 139 CommandException: if errors encountered. | |
| 140 """ | 80 """ |
| 141 | 81 |
| 142 def __init__(self, command_name, proj_id_handler, headers, debug, logger, | 82 def __init__(self, command_name, debug, logger, gsutil_api, url_strs, |
| 143 bucket_storage_uri_class, uri_strs, recursion_requested, | 83 recursion_requested, all_versions=False, |
| 144 have_existing_dst_container=None, flat=True, | 84 cmd_supports_recursion=True, project_id=None, |
| 145 all_versions=False, for_all_version_delete=False, | 85 continue_on_error=False): |
| 146 cmd_supports_recursion=True): | 86 """Creates a NameExpansionIterator. |
| 147 """ | 87 |
| 148 Args: | 88 Args: |
| 149 command_name: name of command being run. | 89 command_name: name of command being run. |
| 150 proj_id_handler: ProjectIdHandler to use for current command. | 90 debug: Debug level to pass to underlying iterators (range 0..3). |
| 151 headers: Dictionary containing optional HTTP headers to pass to boto. | |
| 152 debug: Debug level to pass in to boto connection (range 0..3). | |
| 153 logger: logging.Logger object. | 91 logger: logging.Logger object. |
| 154 bucket_storage_uri_class: Class to instantiate for cloud StorageUris. | 92 gsutil_api: Cloud storage interface. Settable for testing/mocking. |
| 155 Settable for testing/mocking. | 93 url_strs: PluralityCheckableIterator of URL strings needing expansion. |
| 156 uri_strs: PluralityCheckableIterator of URI strings needing expansion. | 94 recursion_requested: True if -R specified on command-line. If so, |
| 157 recursion_requested: True if -R specified on command-line. | 95 listings will be flattened so mapped-to results contain objects |
| 158 have_existing_dst_container: Bool indicator whether this is a copy | 96 spanning subdirectories. |
| 159 request to an existing bucket, bucket subdir, or directory. Default | |
| 160 None value should be used in cases where this is not needed (commands | |
| 161 other than cp). | |
| 162 flat: Bool indicating whether bucket listings should be flattened, i.e., | |
| 163 so the mapped-to results contain objects spanning subdirectories. | |
| 164 all_versions: Bool indicating whether to iterate over all object versions. | 97 all_versions: Bool indicating whether to iterate over all object versions. |
| 165 for_all_version_delete: Bool indicating whether this is for an all-version | 98 cmd_supports_recursion: Bool indicating whether this command supports a |
| 166 delete. | 99 '-R' flag. Useful for printing helpful error messages. |
| 167 cmd_supports_recursion: Bool indicating whether this command supports a '-R' | 100 project_id: Project id to use for bucket retrieval. |
| 168 flag. Useful for printing helpful error messages. | 101 continue_on_error: If true, yield no-match exceptions encountered during |
| 102 iteration instead of raising them. |
| 169 | 103 |
| 170 Examples of _NameExpansionIterator with flat=True: | 104 Examples of _NameExpansionIterator with recursion_requested=True: |
| 171 - Calling with one of the uri_strs being 'gs://bucket' will enumerate all | 105 - Calling with one of the url_strs being 'gs://bucket' will enumerate all |
| 172 top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'. | 106 top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'. |
| 173 - 'gs://bucket/**' will enumerate all objects in the bucket. | 107 - 'gs://bucket/**' will enumerate all objects in the bucket. |
| 174 - 'gs://bucket/abc' will enumerate all next-level objects under directory | 108 - 'gs://bucket/abc' will enumerate either the single object abc or, if |
| 175 abc (i.e., not including subdirectories of abc) if gs://bucket/abc/* | 109 abc is a subdirectory, all objects under abc and any of its |
| 176 matches any objects; otherwise it will enumerate the single name | 110 subdirectories. |
| 177 gs://bucket/abc | |
| 178 - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its | 111 - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its |
| 179 subdirectories. | 112 subdirectories. |
| 180 - 'file:///tmp' will enumerate all files under /tmp, as will | 113 - 'file:///tmp' will enumerate all files under /tmp, as will |
| 181 'file:///tmp/*' | 114 'file:///tmp/*' |
| 182 - 'file:///tmp/**' will enumerate all files under /tmp or any of its | 115 - 'file:///tmp/**' will enumerate all files under /tmp or any of its |
| 183 subdirectories. | 116 subdirectories. |
| 184 | 117 |
| 185 Example if flat=False: calling with gs://bucket/abc/* lists matching objects | 118 Example if recursion_requested=False: |
| 186 or subdirs, but not sub-subdirs or objects beneath subdirs. | 119 calling with gs://bucket/abc/* lists matching objects |
| 120 or subdirs, but not sub-subdirs or objects beneath subdirs. |
| 187 | 121 |
| 188 Note: In step-by-step comments below we give examples assuming there's a | 122 Note: In step-by-step comments below we give examples assuming there's a |
| 189 gs://bucket with object paths: | 123 gs://bucket with object paths: |
| 190 abcd/o1.txt | 124 abcd/o1.txt |
| 191 abcd/o2.txt | 125 abcd/o2.txt |
| 192 xyz/o1.txt | 126 xyz/o1.txt |
| 193 xyz/o2.txt | 127 xyz/o2.txt |
| 194 and a directory file://dir with file paths: | 128 and a directory file://dir with file paths: |
| 195 dir/a.txt | 129 dir/a.txt |
| 196 dir/b.txt | 130 dir/b.txt |
| 197 dir/c/ | 131 dir/c/ |
| 198 """ | 132 """ |
| 199 self.command_name = command_name | 133 self.command_name = command_name |
| 200 self.proj_id_handler = proj_id_handler | |
| 201 self.headers = headers | |
| 202 self.debug = debug | 134 self.debug = debug |
| 203 self.logger = logger | 135 self.logger = logger |
| 204 self.bucket_storage_uri_class = bucket_storage_uri_class | 136 self.gsutil_api = gsutil_api |
| 205 self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class) | 137 self.url_strs = url_strs |
| 206 self.uri_strs = uri_strs | |
| 207 self.recursion_requested = recursion_requested | 138 self.recursion_requested = recursion_requested |
| 208 self.have_existing_dst_container = have_existing_dst_container | |
| 209 self.flat = flat | |
| 210 self.all_versions = all_versions | 139 self.all_versions = all_versions |
| 211 # Check self.uri_strs.has_plurality() at start because its value can change | 140 # Check self.url_strs.HasPlurality() at start because its value can change |
| 212 # if uri_strs is itself an iterator. | 141 # if url_strs is itself an iterator. |
| 213 self.uri_strs.has_plurality = self.uri_strs.has_plurality() | 142 self.url_strs.has_plurality = self.url_strs.HasPlurality() |
| 214 self.cmd_supports_recursion = cmd_supports_recursion | 143 self.cmd_supports_recursion = cmd_supports_recursion |
| 144 self.project_id = project_id |
| 145 self.continue_on_error = continue_on_error |
| 215 | 146 |
| 216 # Map holding wildcard strings to use for flat vs subdir-by-subdir listings. | 147 # Map holding wildcard strings to use for flat vs subdir-by-subdir listings. |
| 217 # (A flat listing means show all objects expanded all the way down.) | 148 # (A flat listing means show all objects expanded all the way down.) |
| 218 self._flatness_wildcard = {True: '**', False: '*'} | 149 self._flatness_wildcard = {True: '**', False: '*'} |
| 219 | 150 |
| 220 def __iter__(self): | 151 def __iter__(self): |
| 221 for uri_str in self.uri_strs: | 152 """Iterates over all source URLs passed to the iterator. |
| 153 |
| 154 For each src url, expands wildcards, object-less bucket names, |
| 155 subdir bucket names, and directory names, and generates a flat listing of |
| 156 all the matching objects/files. |
| 157 |
| 158 You should instantiate this object using the static factory function |
| 159 NameExpansionIterator, because consumers of this iterator need the |
| 160 PluralityCheckableIterator wrapper built by that function. |
| 161 |
| 162 Yields: |
| 163 gslib.name_expansion.NameExpansionResult. |
| 164 |
| 165 Raises: |
| 166 CommandException: if errors encountered. |
| 167 """ |
| 168 for url_str in self.url_strs: |
| 169 storage_url = StorageUrlFromString(url_str) |
| 170 |
| 171 if storage_url.IsFileUrl() and storage_url.IsStream(): |
| 172 if self.url_strs.has_plurality: |
| 173 raise CommandException('Multiple URL strings are not supported ' |
| 174 'with streaming ("-") URLs.') |
| 175 yield NameExpansionResult(storage_url, False, False, storage_url) |
| 176 continue |
| 177 |
| 222 # Step 1: Expand any explicitly specified wildcards. The output from this | 178 # Step 1: Expand any explicitly specified wildcards. The output from this |
| 223 # step is an iterator of BucketListingRef. | 179 # step is an iterator of BucketListingRef. |
| 224 # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd | 180 # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd |
| 225 if ContainsWildcard(uri_str): | 181 |
| 226 post_step1_iter = self._WildcardIterator(uri_str) | 182 src_names_bucket = False |
| 183 if (storage_url.IsCloudUrl() and storage_url.IsBucket() |
| 184 and not self.recursion_requested): |
| 185 # UNIX commands like rm and cp will omit directory references. |
| 186 # If url_str refers only to buckets and we are not recursing, |
| 187 # then produce references of type BUCKET, because they are guaranteed |
| 188 # to pass through Step 2 and be omitted in Step 3. |
| 189 post_step1_iter = PluralityCheckableIterator( |
| 190 self.WildcardIterator(url_str).IterBuckets( |
| 191 bucket_fields=['id'])) |
| 227 else: | 192 else: |
| 228 suri = self.suri_builder.StorageUri(uri_str) | 193 # Get a list of objects and prefixes, expanding the top level for |
| 229 post_step1_iter = iter([BucketListingRef(suri)]) | 194 # any listed buckets. If our source is a bucket, however, we need |
| 230 post_step1_iter = PluralityCheckableIterator(post_step1_iter) | 195 # to treat all of the top level expansions as names_container=True. |
| 231 | 196 post_step1_iter = PluralityCheckableIterator( |
| 232 # Step 2: Expand bucket subdirs and versions. The output from this | 197 self.WildcardIterator(url_str).IterAll( |
| 198 bucket_listing_fields=['name'], |
| 199 expand_top_level_buckets=True)) |
| 200 if storage_url.IsCloudUrl() and storage_url.IsBucket(): |
| 201 src_names_bucket = True |
| 202 |
| 203 # Step 2: Expand bucket subdirs. The output from this |
| 233 # step is an iterator of (names_container, BucketListingRef). | 204 # step is an iterator of (names_container, BucketListingRef). |
| 234 # Starting with gs://bucket/abcd this step would expand to: | 205 # Starting with gs://bucket/abcd this step would expand to: |
| 235 # iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]). | 206 # iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]). |
| 236 if self.flat and self.recursion_requested: | 207 subdir_exp_wildcard = self._flatness_wildcard[self.recursion_requested] |
| 237 post_step2_iter = _ImplicitBucketSubdirIterator(self, | 208 if self.recursion_requested: |
| 238 post_step1_iter, self.flat) | 209 post_step2_iter = _ImplicitBucketSubdirIterator( |
| 239 elif self.all_versions: | 210 self, post_step1_iter, subdir_exp_wildcard) |
| 240 post_step2_iter = _AllVersionIterator(self, post_step1_iter, | |
| 241 headers=self.headers) | |
| 242 else: | 211 else: |
| 243 post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter) | 212 post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter) |
| 244 post_step2_iter = PluralityCheckableIterator(post_step2_iter) | 213 post_step2_iter = PluralityCheckableIterator(post_step2_iter) |
| 245 | 214 |
| 246 # Step 3. Expand directories and buckets. This step yields the iterated | 215 # Because we actually perform and check object listings here, this will |
| 216 # raise if url_args includes a non-existent object. However, |
| 217 # plurality_checkable_iterator will buffer the exception for us, not |
| 218 # raising it until the iterator is actually asked to yield the first |
| 219 # result. |
| 220 if post_step2_iter.IsEmpty(): |
| 221 if self.continue_on_error: |
| 222 try: |
| 223 raise CommandException('No URLs matched: %s' % url_str) |
| 224 except CommandException, e: |
| 225 # Yield a specialized tuple of (exception, stack_trace) to |
| 226 # the wrapping PluralityCheckableIterator. |
| 227 yield (e, sys.exc_info()[2]) |
| 228 else: |
| 229 raise CommandException('No URLs matched: %s' % url_str) |
| 230 |
| 231 # Step 3. Omit any directories, buckets, or bucket subdirectories for |
| 232 # non-recursive expansions. |
| 233 post_step3_iter = PluralityCheckableIterator(_OmitNonRecursiveIterator( |
| 234 post_step2_iter, self.recursion_requested, self.command_name, |
| 235 self.cmd_supports_recursion, self.logger)) |
| 236 |
| 237 src_url_expands_to_multi = post_step3_iter.HasPlurality() |
| 238 is_multi_source_request = (self.url_strs.has_plurality |
| 239 or src_url_expands_to_multi) |
| 240 |
| 241 # Step 4. Expand directories and buckets. This step yields the iterated |
| 247 # values. Starting with gs://bucket this step would expand to: | 242 # values. Starting with gs://bucket this step would expand to: |
| 248 # [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt] | 243 # [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt] |
| 249 # Starting with file://dir this step would expand to: | 244 # Starting with file://dir this step would expand to: |
| 250 # [dir/a.txt, dir/b.txt, dir/c/] | 245 # [dir/a.txt, dir/b.txt, dir/c/] |
| 251 exp_src_bucket_listing_refs = [] | 246 for (names_container, blr) in post_step3_iter: |
| 252 wc = self._flatness_wildcard[self.flat] | 247 src_names_container = src_names_bucket or names_container |
| 253 src_uri_expands_to_multi = (post_step1_iter.has_plurality() | 248 |
| 254 or post_step2_iter.has_plurality()) | 249 if blr.IsObject(): |
| 255 is_multi_src_request = (self.uri_strs.has_plurality | 250 yield NameExpansionResult( |
| 256 or src_uri_expands_to_multi) | 251 storage_url, is_multi_source_request, src_names_container, |
| 257 | 252 blr.storage_url) |
| 258 if post_step2_iter.is_empty(): | 253 else: |
| 259 raise CommandException('No URIs matched: %s' % uri_str) | 254 # Use implicit wildcarding to do the enumeration. |
| 260 for (names_container, blr) in post_step2_iter: | 255 # At this point we are guaranteed that: |
| 261 if (not blr.GetUri().names_container() | 256 # - Recursion has been requested because non-object entries are |
| 262 and (self.flat or not blr.HasPrefix())): | 257 # filtered in step 3 otherwise. |
| 263 yield NameExpansionResult(uri_str, is_multi_src_request, | 258 # - This is a prefix or bucket subdirectory because only |
| 264 src_uri_expands_to_multi, names_container, | 259 # non-recursive iterations product bucket references. |
| 265 blr.GetUriString(), | 260 expanded_url = StorageUrlFromString(blr.url_string) |
| 266 self.have_existing_dst_container, | 261 if expanded_url.IsFileUrl(): |
| 267 is_latest=blr.IsLatest()) | 262 # Convert dir to implicit recursive wildcard. |
| 268 continue | 263 url_to_iterate = '%s%s%s' % (blr, os.sep, subdir_exp_wildcard) |
| 269 if not self.recursion_requested: | |
| 270 if blr.GetUri().is_file_uri(): | |
| 271 desc = 'directory' | |
| 272 elif blr.GetUri().names_bucket(): | |
| 273 desc = 'bucket' | |
| 274 else: | 264 else: |
| 275 desc = 'bucket subdir' | 265 # Convert subdir to implicit recursive wildcard. |
| 276 if self.cmd_supports_recursion: | 266 url_to_iterate = expanded_url.CreatePrefixUrl( |
| 277 self.logger.info( | 267 wildcard_suffix=subdir_exp_wildcard) |
| 278 'Omitting %s "%s". (Did you mean to do %s -R?)', | 268 |
| 279 desc, blr.GetUri(), self.command_name) | 269 wc_iter = PluralityCheckableIterator( |
| 280 else: | 270 self.WildcardIterator(url_to_iterate).IterObjects( |
| 281 self.logger.info('Omitting %s "%s".', desc, blr.GetUri()) | 271 bucket_listing_fields=['name'])) |
| 282 continue | 272 src_url_expands_to_multi = (src_url_expands_to_multi |
| 283 if blr.GetUri().is_file_uri(): | 273 or wc_iter.HasPlurality()) |
| 284 # Convert dir to implicit recursive wildcard. | 274 is_multi_source_request = (self.url_strs.has_plurality |
| 285 uri_to_iterate = '%s/%s' % (blr.GetUriString(), wc) | 275 or src_url_expands_to_multi) |
| 286 else: | 276 # This will be a flattened listing of all underlying objects in the |
| 287 # Convert bucket to implicit recursive wildcard. | 277 # subdir. |
| 288 uri_to_iterate = blr.GetUri().clone_replace_name(wc) | 278 for blr in wc_iter: |
| 289 wc_iter = PluralityCheckableIterator( | 279 yield NameExpansionResult( |
| 290 self._WildcardIterator(uri_to_iterate)) | 280 storage_url, is_multi_source_request, True, blr.storage_url) |
| 291 src_uri_expands_to_multi = (src_uri_expands_to_multi | 281 |
| 292 or wc_iter.has_plurality()) | 282 def WildcardIterator(self, url_string): |
| 293 is_multi_src_request = (self.uri_strs.has_plurality | 283 """Helper to instantiate gslib.WildcardIterator. |
| 294 or src_uri_expands_to_multi) | 284 |
| 295 for blr in wc_iter: | 285 Args are same as gslib.WildcardIterator interface, but this method fills |
| 296 yield NameExpansionResult(uri_str, is_multi_src_request, | 286 in most of the values from instance state. |
| 297 src_uri_expands_to_multi, True, | 287 |
| 298 blr.GetUriString(), | 288 Args: |
| 299 self.have_existing_dst_container, | 289 url_string: URL string naming wildcard objects to iterate. |
| 300 is_latest=blr.IsLatest()) | 290 |
| 301 | 291 Returns: |
| 302 def _WildcardIterator(self, uri_or_str): | 292 Wildcard iterator over URL string. |
| 303 """ | 293 """ |
| 304 Helper to instantiate gslib.WildcardIterator. Args are same as | 294 return gslib.wildcard_iterator.CreateWildcardIterator( |
| 305 gslib.WildcardIterator interface, but this method fills in most of the | 295 url_string, self.gsutil_api, debug=self.debug, |
| 306 values from instance state. | 296 all_versions=self.all_versions, |
| 307 | 297 project_id=self.project_id) |
| 308 Args: | 298 |
| 309 uri_or_str: StorageUri or URI string naming wildcard objects to iterate. | 299 |
| 310 """ | 300 def NameExpansionIterator(command_name, debug, logger, gsutil_api, url_strs, |
| 311 return wildcard_iterator.wildcard_iterator( | 301 recursion_requested, all_versions=False, |
| 312 uri_or_str, self.proj_id_handler, | 302 cmd_supports_recursion=True, project_id=None, |
| 313 bucket_storage_uri_class=self.bucket_storage_uri_class, | 303 continue_on_error=False): |
| 314 headers=self.headers, debug=self.debug, | 304 """Static factory function for instantiating _NameExpansionIterator. |
| 315 all_versions=self.all_versions) | 305 |
| 316 | 306 This wraps the resulting iterator in a PluralityCheckableIterator and checks |
| 317 | 307 that it is non-empty. Also, allows url_strs to be either an array or an |
| 318 def NameExpansionIterator(command_name, proj_id_handler, headers, debug, | |
| 319 logger, bucket_storage_uri_class, uri_strs, | |
| 320 recursion_requested, | |
| 321 have_existing_dst_container=None, flat=True, | |
| 322 all_versions=False, | |
| 323 for_all_version_delete=False, | |
| 324 cmd_supports_recursion=True): | |
| 325 """ | |
| 326 Static factory function for instantiating _NameExpansionIterator, which | |
| 327 wraps the resulting iterator in a PluralityCheckableIterator and checks | |
| 328 that it is non-empty. Also, allows uri_strs can be either an array or an | |
| 329 iterator. | 308 iterator. |
| 330 | 309 |
| 331 Args: | 310 Args: |
| 332 command_name: name of command being run. | 311 command_name: name of command being run. |
| 333 proj_id_handler: ProjectIdHandler to use for current command. | 312 debug: Debug level to pass to underlying iterators (range 0..3). |
| 334 headers: Dictionary containing optional HTTP headers to pass to boto. | |
| 335 debug: Debug level to pass in to boto connection (range 0..3). | |
| 336 logger: logging.Logger object. | 313 logger: logging.Logger object. |
| 337 bucket_storage_uri_class: Class to instantiate for cloud StorageUris. | 314 gsutil_api: Cloud storage interface. Settable for testing/mocking. |
| 338 Settable for testing/mocking. | 315 url_strs: Iterable URL strings needing expansion. |
| 339 uri_strs: PluralityCheckableIterator of URI strings needing expansion. | 316 recursion_requested: True if -R specified on command-line. If so, |
| 340 recursion_requested: True if -R specified on command-line. | 317 listings will be flattened so mapped-to results contain objects |
| 341 have_existing_dst_container: Bool indicator whether this is a copy | 318 spanning subdirectories. |
| 342 request to an existing bucket, bucket subdir, or directory. Default | |
| 343 None value should be used in cases where this is not needed (commands | |
| 344 other than cp). | |
| 345 flat: Bool indicating whether bucket listings should be flattened, i.e., | |
| 346 so the mapped-to results contain objects spanning subdirectories. | |
| 347 all_versions: Bool indicating whether to iterate over all object versions. | 319 all_versions: Bool indicating whether to iterate over all object versions. |
| 348 for_all_version_delete: Bool indicating whether this is for an all-version | |
| 349 delete. | |
| 350 cmd_supports_recursion: Bool indicating whether this command supports a '-R' | 320 cmd_supports_recursion: Bool indicating whether this command supports a '-R' |
| 351 flag. Useful for printing helpful error messages. | 321 flag. Useful for printing helpful error messages. |
| 352 | 322 project_id: Project id to use for the current command. |
| 353 Examples of ExpandWildcardsAndContainers with flat=True: | 323 continue_on_error: If true, yield no-match exceptions encountered during |
| 354 - Calling with one of the uri_strs being 'gs://bucket' will enumerate all | 324 iteration instead of raising them. |
| 355 top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'. | 325 |
| 356 - 'gs://bucket/**' will enumerate all objects in the bucket. | 326 Raises: |
| 357 - 'gs://bucket/abc' will enumerate all next-level objects under directory | 327 CommandException if underlying iterator is empty. |
| 358 abc (i.e., not including subdirectories of abc) if gs://bucket/abc/* | 328 |
| 359 matches any objects; otherwise it will enumerate the single name | 329 Returns: |
| 360 gs://bucket/abc | 330 Name expansion iterator instance. |
| 361 - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its | 331 |
| 362 subdirectories. | 332 For example semantics, see comments in NameExpansionIterator.__init__. |
| 363 - 'file:///tmp' will enumerate all files under /tmp, as will | |
| 364 'file:///tmp/*' | |
| 365 - 'file:///tmp/**' will enumerate all files under /tmp or any of its | |
| 366 subdirectories. | |
| 367 | |
| 368 Example if flat=False: calling with gs://bucket/abc/* lists matching objects | |
| 369 or subdirs, but not sub-subdirs or objects beneath subdirs. | |
| 370 | |
| 371 Note: In step-by-step comments below we give examples assuming there's a | |
| 372 gs://bucket with object paths: | |
| 373 abcd/o1.txt | |
| 374 abcd/o2.txt | |
| 375 xyz/o1.txt | |
| 376 xyz/o2.txt | |
| 377 and a directory file://dir with file paths: | |
| 378 dir/a.txt | |
| 379 dir/b.txt | |
| 380 dir/c/ | |
| 381 """ | 333 """ |
| 382 uri_strs = PluralityCheckableIterator(uri_strs) | 334 url_strs = PluralityCheckableIterator(url_strs) |
| 383 name_expansion_iterator = _NameExpansionIterator( | 335 name_expansion_iterator = _NameExpansionIterator( |
| 384 command_name, proj_id_handler, headers, debug, logger, | 336 command_name, debug, logger, gsutil_api, url_strs, recursion_requested, |
| 385 bucket_storage_uri_class, uri_strs, recursion_requested, | 337 all_versions=all_versions, cmd_supports_recursion=cmd_supports_recursion, |
| 386 have_existing_dst_container, flat, all_versions=all_versions, | 338 project_id=project_id, continue_on_error=continue_on_error) |
| 387 for_all_version_delete=for_all_version_delete, | |
| 388 cmd_supports_recursion=cmd_supports_recursion) | |
| 389 name_expansion_iterator = PluralityCheckableIterator(name_expansion_iterator) | 339 name_expansion_iterator = PluralityCheckableIterator(name_expansion_iterator) |
| 390 if name_expansion_iterator.is_empty(): | 340 if name_expansion_iterator.IsEmpty(): |
| 391 raise CommandException('No URIs matched') | 341 raise CommandException('No URLs matched') |
| 392 return name_expansion_iterator | 342 return name_expansion_iterator |
| 393 | 343 |
| 394 | 344 |
| 395 class NameExpansionIteratorQueue(object): | 345 class NameExpansionIteratorQueue(object): |
| 396 """ | 346 """Wrapper around NameExpansionIterator with Multiprocessing.Queue interface. |
| 397 Wrapper around NameExpansionIterator that provides a Multiprocessing.Queue | |
| 398 facade. | |
| 399 | 347 |
| 400 Only a blocking get() function can be called, and the block and timeout | 348 Only a blocking get() function can be called, and the block and timeout |
| 401 params on that function are ignored. All other class functions raise | 349 params on that function are ignored. All other class functions raise |
| 402 NotImplementedError. | 350 NotImplementedError. |
| 403 | 351 |
| 404 This class is thread safe. | 352 This class is thread safe. |
| 405 """ | 353 """ |
| 406 | 354 |
| 407 def __init__(self, name_expansion_iterator, final_value): | 355 def __init__(self, name_expansion_iterator, final_value): |
| 408 self.name_expansion_iterator = name_expansion_iterator | 356 self.name_expansion_iterator = name_expansion_iterator |
| 409 self.final_value = final_value | 357 self.final_value = final_value |
| 410 self.lock = multiprocessing.Manager().Lock() | 358 self.lock = multiprocessing.Manager().Lock() |
| 411 | 359 |
| 412 def qsize(self): | 360 def qsize(self): |
| 413 raise NotImplementedError( | 361 raise NotImplementedError( |
| 414 "NameExpansionIteratorQueue.qsize() not implemented") | 362 'NameExpansionIteratorQueue.qsize() not implemented') |
| 415 | 363 |
| 416 def empty(self): | 364 def empty(self): |
| 417 raise NotImplementedError( | 365 raise NotImplementedError( |
| 418 "NameExpansionIteratorQueue.empty() not implemented") | 366 'NameExpansionIteratorQueue.empty() not implemented') |
| 419 | 367 |
| 420 def full(self): | 368 def full(self): |
| 421 raise NotImplementedError( | 369 raise NotImplementedError( |
| 422 "NameExpansionIteratorQueue.full() not implemented") | 370 'NameExpansionIteratorQueue.full() not implemented') |
| 423 | 371 |
| 372 # pylint: disable=unused-argument |
| 424 def put(self, obj=None, block=None, timeout=None): | 373 def put(self, obj=None, block=None, timeout=None): |
| 425 raise NotImplementedError( | 374 raise NotImplementedError( |
| 426 "NameExpansionIteratorQueue.put() not implemented") | 375 'NameExpansionIteratorQueue.put() not implemented') |
| 427 | 376 |
| 428 def put_nowait(self, obj): | 377 def put_nowait(self, obj): |
| 429 raise NotImplementedError( | 378 raise NotImplementedError( |
| 430 "NameExpansionIteratorQueue.put_nowait() not implemented") | 379 'NameExpansionIteratorQueue.put_nowait() not implemented') |
| 431 | 380 |
| 381 # pylint: disable=unused-argument |
| 432 def get(self, block=None, timeout=None): | 382 def get(self, block=None, timeout=None): |
| 433 self.lock.acquire() | 383 self.lock.acquire() |
| 434 try: | 384 try: |
| 435 if self.name_expansion_iterator.is_empty(): | 385 if self.name_expansion_iterator.IsEmpty(): |
| 436 return self.final_value | 386 return self.final_value |
| 437 return self.name_expansion_iterator.next() | 387 return self.name_expansion_iterator.next() |
| 438 finally: | 388 finally: |
| 439 self.lock.release() | 389 self.lock.release() |
| 440 | 390 |
| 441 def get_nowait(self): | 391 def get_nowait(self): |
| 442 raise NotImplementedError( | 392 raise NotImplementedError( |
| 443 "NameExpansionIteratorQueue.get_nowait() not implemented") | 393 'NameExpansionIteratorQueue.get_nowait() not implemented') |
| 444 | 394 |
| 445 def get_no_wait(self): | 395 def get_no_wait(self): |
| 446 raise NotImplementedError( | 396 raise NotImplementedError( |
| 447 "NameExpansionIteratorQueue.get_no_wait() not implemented") | 397 'NameExpansionIteratorQueue.get_no_wait() not implemented') |
| 448 | 398 |
| 449 def close(self): | 399 def close(self): |
| 450 raise NotImplementedError( | 400 raise NotImplementedError( |
| 451 "NameExpansionIteratorQueue.close() not implemented") | 401 'NameExpansionIteratorQueue.close() not implemented') |
| 452 | 402 |
| 453 def join_thread(self): | 403 def join_thread(self): |
| 454 raise NotImplementedError( | 404 raise NotImplementedError( |
| 455 "NameExpansionIteratorQueue.join_thread() not implemented") | 405 'NameExpansionIteratorQueue.join_thread() not implemented') |
| 456 | 406 |
| 457 def cancel_join_thread(self): | 407 def cancel_join_thread(self): |
| 458 raise NotImplementedError( | 408 raise NotImplementedError( |
| 459 "NameExpansionIteratorQueue.cancel_join_thread() not implemented") | 409 'NameExpansionIteratorQueue.cancel_join_thread() not implemented') |
| 460 | 410 |
| 461 | 411 |
| 462 class _NonContainerTuplifyIterator(object): | 412 class _NonContainerTuplifyIterator(object): |
| 463 """ | 413 """Iterator that produces the tuple (False, blr) for each iterated value. |
| 464 Iterator that produces the tuple (False, blr) for each iteration | 414 |
| 465 of blr_iter. Used for cases where blr_iter iterates over a set of | 415 Used for cases where blr_iter iterates over a set of |
| 466 BucketListingRefs known not to name containers. | 416 BucketListingRefs known not to name containers. |
| 467 """ | 417 """ |
| 468 | 418 |
| 469 def __init__(self, blr_iter): | 419 def __init__(self, blr_iter): |
| 470 """ | 420 """Instantiates iterator. |
| 421 |
| 471 Args: | 422 Args: |
| 472 blr_iter: iterator of BucketListingRef. | 423 blr_iter: iterator of BucketListingRef. |
| 473 """ | 424 """ |
| 474 self.blr_iter = blr_iter | 425 self.blr_iter = blr_iter |
| 475 | 426 |
| 476 def __iter__(self): | 427 def __iter__(self): |
| 477 for blr in self.blr_iter: | 428 for blr in self.blr_iter: |
| 478 yield (False, blr) | 429 yield (False, blr) |
| 479 | 430 |
| 480 | 431 |
| 432 class _OmitNonRecursiveIterator(object): |
| 433 """Iterator wrapper for that omits certain values for non-recursive requests. |
| 434 |
| 435 This iterates over tuples of (names_container, BucketListingReference) and |
| 436 omits directories, prefixes, and buckets from non-recurisve requests |
| 437 so that we can properly calculate whether the source URL expands to multiple |
| 438 URLs. |
| 439 |
| 440 For example, if we have a bucket containing two objects: bucket/foo and |
| 441 bucket/foo/bar and we do a non-recursive iteration, only bucket/foo will be |
| 442 yielded. |
| 443 """ |
| 444 |
| 445 def __init__(self, tuple_iter, recursion_requested, command_name, |
| 446 cmd_supports_recursion, logger): |
| 447 """Instanties the iterator. |
| 448 |
| 449 Args: |
| 450 tuple_iter: Iterator over names_container, BucketListingReference |
| 451 from step 2 in the NameExpansionIterator |
| 452 recursion_requested: If false, omit buckets, dirs, and subdirs |
| 453 command_name: Command name for user messages |
| 454 cmd_supports_recursion: Command recursion support for user messages |
| 455 logger: Log object for user messages |
| 456 """ |
| 457 self.tuple_iter = tuple_iter |
| 458 self.recursion_requested = recursion_requested |
| 459 self.command_name = command_name |
| 460 self.cmd_supports_recursion = cmd_supports_recursion |
| 461 self.logger = logger |
| 462 |
| 463 def __iter__(self): |
| 464 for (names_container, blr) in self.tuple_iter: |
| 465 if not self.recursion_requested and not blr.IsObject(): |
| 466 # At this point we either have a bucket or a prefix, |
| 467 # so if recursion is not requested, we're going to omit it. |
| 468 expanded_url = StorageUrlFromString(blr.url_string) |
| 469 if expanded_url.IsFileUrl(): |
| 470 desc = 'directory' |
| 471 else: |
| 472 desc = blr.type_name |
| 473 if self.cmd_supports_recursion: |
| 474 self.logger.info( |
| 475 'Omitting %s "%s". (Did you mean to do %s -R?)', |
| 476 desc, blr.url_string, self.command_name) |
| 477 else: |
| 478 self.logger.info('Omitting %s "%s".', desc, blr.url_string) |
| 479 else: |
| 480 yield (names_container, blr) |
| 481 |
| 482 |
| 481 class _ImplicitBucketSubdirIterator(object): | 483 class _ImplicitBucketSubdirIterator(object): |
| 482 | 484 """Iterator wrapper that performs implicit bucket subdir expansion. |
| 483 """ | |
| 484 Iterator wrapper that iterates over blr_iter, performing implicit bucket | |
| 485 subdir expansion. | |
| 486 | 485 |
| 487 Each iteration yields tuple (names_container, expanded BucketListingRefs) | 486 Each iteration yields tuple (names_container, expanded BucketListingRefs) |
| 488 where names_container is true if URI names a directory, bucket, | 487 where names_container is true if URL names a directory, bucket, |
| 489 or bucket subdir (vs how StorageUri.names_container() doesn't | 488 or bucket subdir. |
| 490 handle latter case). | |
| 491 | 489 |
| 492 For example, iterating over [BucketListingRef("gs://abc")] would expand to: | 490 For example, iterating over [BucketListingRef("gs://abc")] would expand to: |
| 493 [BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")] | 491 [BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")] |
| 494 if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise. | 492 if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise. |
| 495 """ | 493 """ |
| 496 | 494 |
| 497 def __init__(self, name_expansion_instance, blr_iter, flat): | 495 def __init__(self, name_exp_instance, blr_iter, subdir_exp_wildcard): |
| 498 """ | 496 """Instantiates the iterator. |
| 497 |
| 499 Args: | 498 Args: |
| 500 name_expansion_instance: calling instance of NameExpansion class. | 499 name_exp_instance: calling instance of NameExpansion class. |
| 501 blr_iter: iterator of BucketListingRef. | 500 blr_iter: iterator over BucketListingRef prefixes and objects. |
| 502 flat: bool indicating whether bucket listings should be flattened, i.e., | 501 subdir_exp_wildcard: wildcard for expanding subdirectories; |
| 503 so the mapped-to results contain objects spanning subdirectories. | 502 expected values are ** if the mapped-to results should contain |
| 503 objects spanning subdirectories, or * if only one level should |
| 504 be listed. |
| 504 """ | 505 """ |
| 505 self.blr_iter = blr_iter | 506 self.blr_iter = blr_iter |
| 506 self.name_expansion_instance = name_expansion_instance | 507 self.name_exp_instance = name_exp_instance |
| 507 self.flat = flat | 508 self.subdir_exp_wildcard = subdir_exp_wildcard |
| 508 | 509 |
| 509 def __iter__(self): | 510 def __iter__(self): |
| 510 for blr in self.blr_iter: | 511 for blr in self.blr_iter: |
| 511 uri = blr.GetUri() | 512 if blr.IsPrefix(): |
| 512 if uri.names_object(): | 513 # This is a bucket subdirectory, list objects according to the wildcard. |
| 513 # URI could be a bucket subdir. | 514 prefix_url = StorageUrlFromString(blr.url_string).CreatePrefixUrl( |
| 515 wildcard_suffix=self.subdir_exp_wildcard) |
| 514 implicit_subdir_iterator = PluralityCheckableIterator( | 516 implicit_subdir_iterator = PluralityCheckableIterator( |
| 515 self.name_expansion_instance._WildcardIterator( | 517 self.name_exp_instance.WildcardIterator( |
| 516 self.name_expansion_instance.suri_builder.StorageUri( | 518 prefix_url).IterAll(bucket_listing_fields=['name'])) |
| 517 '%s/%s' % (uri.uri.rstrip('/'), | 519 if not implicit_subdir_iterator.IsEmpty(): |
| 518 self.name_expansion_instance._flatness_wildcard[ | |
| 519 self.flat])))) | |
| 520 if not implicit_subdir_iterator.is_empty(): | |
| 521 for exp_blr in implicit_subdir_iterator: | 520 for exp_blr in implicit_subdir_iterator: |
| 522 yield (True, exp_blr) | 521 yield (True, exp_blr) |
| 523 else: | 522 else: |
| 523 # Prefix that contains no objects, for example in the $folder$ case |
| 524 # or an empty filesystem directory. |
| 524 yield (False, blr) | 525 yield (False, blr) |
| 526 elif blr.IsObject(): |
| 527 yield (False, blr) |
| 525 else: | 528 else: |
| 526 yield (False, blr) | 529 raise CommandException( |
| 527 | 530 '_ImplicitBucketSubdirIterator got a bucket reference %s' % blr) |
| 528 class _AllVersionIterator(object): | |
| 529 """ | |
| 530 Iterator wrapper that iterates over blr_iter, performing implicit version | |
| 531 expansion. | |
| 532 | |
| 533 Output behavior is identical to that in _ImplicitBucketSubdirIterator above. | |
| 534 | |
| 535 For example, iterating over [BucketListingRef("gs://abc/o1")] would expand to: | |
| 536 [BucketListingRef("gs://abc/o1#1234"), BucketListingRef("gs://abc/o1#1235")] | |
| 537 """ | |
| 538 | |
| 539 def __init__(self, name_expansion_instance, blr_iter, headers=None): | |
| 540 """ | |
| 541 Args: | |
| 542 name_expansion_instance: calling instance of NameExpansion class. | |
| 543 blr_iter: iterator of BucketListingRef. | |
| 544 flat: bool indicating whether bucket listings should be flattened, i.e., | |
| 545 so the mapped-to results contain objects spanning subdirectories. | |
| 546 """ | |
| 547 self.blr_iter = blr_iter | |
| 548 self.name_expansion_instance = name_expansion_instance | |
| 549 self.headers = headers | |
| 550 | |
| 551 def __iter__(self): | |
| 552 empty = True | |
| 553 for blr in self.blr_iter: | |
| 554 uri = blr.GetUri() | |
| 555 if not uri.names_object(): | |
| 556 empty = False | |
| 557 yield (True, blr) | |
| 558 break | |
| 559 for key in uri.list_bucket( | |
| 560 prefix=uri.object_name, headers=self.headers, all_versions=True): | |
| 561 if key.name != uri.object_name: | |
| 562 # The desired entries will be alphabetically first in this listing. | |
| 563 break | |
| 564 version_blr = BucketListingRef(uri.clone_replace_key(key), key=key) | |
| 565 empty = False | |
| 566 yield (False, version_blr) | |
| 567 # If no version exists, yield the unversioned blr, and let the consuming | |
| 568 # operation fail. This mirrors behavior in _ImplicitBucketSubdirIterator. | |
| 569 if empty: | |
| 570 yield (False, blr) | |
| 571 | |
| OLD | NEW |