Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(214)

Side by Side Diff: gslib/name_expansion.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gslib/ls_helper.py ('k') | gslib/no_op_auth_plugin.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # -*- coding: utf-8 -*-
1 # Copyright 2012 Google Inc. All Rights Reserved. 2 # Copyright 2012 Google Inc. All Rights Reserved.
2 # 3 #
3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License. 5 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at 6 # You may obtain a copy of the License at
6 # 7 #
7 # http://www.apache.org/licenses/LICENSE-2.0 8 # http://www.apache.org/licenses/LICENSE-2.0
8 # 9 #
9 # Unless required by applicable law or agreed to in writing, software 10 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and 13 # See the License for the specific language governing permissions and
13 # limitations under the License. 14 # limitations under the License.
15 """Name expansion iterator and result classes.
14 16
15 import copy
16 import multiprocessing
17 import wildcard_iterator
18
19 from bucket_listing_ref import BucketListingRef
20 from gslib.exception import CommandException
21 from gslib.plurality_checkable_iterator import PluralityCheckableIterator
22 from gslib.storage_uri_builder import StorageUriBuilder
23 from wildcard_iterator import ContainsWildcard
24
25 """
26 Name expansion support for the various ways gsutil lets users refer to 17 Name expansion support for the various ways gsutil lets users refer to
27 collections of data (via explicit wildcarding as well as directory, 18 collections of data (via explicit wildcarding as well as directory,
28 bucket, and bucket subdir implicit wildcarding). This class encapsulates 19 bucket, and bucket subdir implicit wildcarding). This class encapsulates
29 the various rules for determining how these expansions are done. 20 the various rules for determining how these expansions are done.
30 """ 21 """
31 22
23 # Disable warnings for NameExpansionIteratorQueue functions; they implement
24 # an interface which does not follow lint guidelines.
25 # pylint: disable=invalid-name
26
27 from __future__ import absolute_import
28
29 import multiprocessing
30 import os
31 import sys
32
33 from gslib.exception import CommandException
34 from gslib.plurality_checkable_iterator import PluralityCheckableIterator
35 import gslib.wildcard_iterator
36 from gslib.wildcard_iterator import StorageUrlFromString
37
32 38
33 class NameExpansionResult(object): 39 class NameExpansionResult(object):
34 """ 40 """Holds one fully expanded result from iterating over NameExpansionIterator.
35 Holds one fully expanded result from iterating over NameExpansionIterator.
36 41
37 The member data in this class need to be pickleable because 42 The member data in this class need to be pickleable because
38 NameExpansionResult instances are passed through Multiprocessing.Queue. In 43 NameExpansionResult instances are passed through Multiprocessing.Queue. In
39 particular, don't include any boto state like StorageUri, since that pulls 44 particular, don't include any boto state like StorageUri, since that pulls
40 in a big tree of objects, some of which aren't pickleable (and even if 45 in a big tree of objects, some of which aren't pickleable (and even if
41 they were, pickling/unpickling such a large object tree would result in 46 they were, pickling/unpickling such a large object tree would result in
42 significant overhead). 47 significant overhead).
43 48
44 The state held in this object is needed for handling the various naming cases 49 The state held in this object is needed for handling the various naming cases
45 (e.g., copying from a single source URI to a directory generates different 50 (e.g., copying from a single source URL to a directory generates different
46 dest URI names than copying multiple URIs to a directory, to be consistent 51 dest URL names than copying multiple URLs to a directory, to be consistent
47 with naming rules used by the Unix cp command). For more details see comments 52 with naming rules used by the Unix cp command). For more details see comments
48 in _NameExpansionIterator. 53 in _NameExpansionIterator.
49 """ 54 """
50 55
51 def __init__(self, src_uri_str, is_multi_src_request, 56 def __init__(self, source_storage_url, is_multi_source_request,
52 src_uri_expands_to_multi, names_container, expanded_uri_str, 57 names_container, expanded_storage_url):
53 have_existing_dst_container=None, is_latest=False): 58 """Instantiates a result from name expansion.
59
60 Args:
61 source_storage_url: StorageUrl that was being expanded.
62 is_multi_source_request: bool indicator whether src_url_str expanded to
63 more than one BucketListingRef.
64 names_container: Bool indicator whether src_url names a container.
65 expanded_storage_url: StorageUrl that was expanded.
54 """ 66 """
55 Args: 67 self.source_storage_url = source_storage_url
56 src_uri_str: string representation of StorageUri that was expanded. 68 self.is_multi_source_request = is_multi_source_request
57 is_multi_src_request: bool indicator whether src_uri_str expanded to more
58 than 1 BucketListingRef.
59 src_uri_expands_to_multi: bool indicator whether the current src_uri
60 expanded to more than 1 BucketListingRef.
61 names_container: Bool indicator whether src_uri names a container.
62 expanded_uri_str: string representation of StorageUri to which src_uri_str
63 expands.
64 have_existing_dst_container: bool indicator whether this is a copy
65 request to an existing bucket, bucket subdir, or directory. Default
66 None value should be used in cases where this is not needed (commands
67 other than cp).
68 is_latest: Bool indicating that the result represents the object's current
69 version.
70 """
71 self.src_uri_str = src_uri_str
72 self.is_multi_src_request = is_multi_src_request
73 self.src_uri_expands_to_multi = src_uri_expands_to_multi
74 self.names_container = names_container 69 self.names_container = names_container
75 self.expanded_uri_str = expanded_uri_str 70 self.expanded_storage_url = expanded_storage_url
76 self.have_existing_dst_container = have_existing_dst_container
77 self.is_latest = is_latest
78 71
79 def __repr__(self): 72 def __repr__(self):
80 return '%s' % self.expanded_uri_str 73 return '%s' % self._expanded_storage_url
81
82 def IsEmpty(self):
83 """Returns True if name expansion yielded no matches."""
84 return self.expanded_blr is None
85
86 def GetSrcUriStr(self):
87 """Returns the string representation of the StorageUri that was expanded."""
88 return self.src_uri_str
89
90 def IsMultiSrcRequest(self):
91 """
92 Returns bool indicator whether name expansion resulted in more than 0
93 BucketListingRef.
94 """
95 return self.is_multi_src_request
96
97 def SrcUriExpandsToMulti(self):
98 """
99 Returns bool indicator whether the current src_uri expanded to more than
100 1 BucketListingRef
101 """
102 return self.src_uri_expands_to_multi
103
104 def NamesContainer(self):
105 """
106 Returns bool indicator of whether src_uri names a directory, bucket, or
107 bucket subdir.
108 """
109 return self.names_container
110
111 def GetExpandedUriStr(self):
112 """
113 Returns the string representation of StorageUri to which src_uri_str
114 expands.
115 """
116 return self.expanded_uri_str
117
118 def HaveExistingDstContainer(self):
119 """Returns bool indicator whether this is a copy request to an
120 existing bucket, bucket subdir, or directory, or None if not
121 relevant."""
122 return self.have_existing_dst_container
123 74
124 75
125 class _NameExpansionIterator(object): 76 class _NameExpansionIterator(object):
126 """ 77 """Class that iterates over all source URLs passed to the iterator.
127 Iterates over all src_uris, expanding wildcards, object-less bucket names,
128 subdir bucket names, and directory names, generating a flat listing of all
129 the matching objects/files.
130 78
131 You should instantiate this object using the static factory function 79 See details in __iter__ function doc.
132 NameExpansionIterator, because consumers of this iterator need the
133 PluralityCheckableIterator wrapper built by that function.
134
135 Yields:
136 gslib.name_expansion.NameExpansionResult.
137
138 Raises:
139 CommandException: if errors encountered.
140 """ 80 """
141 81
142 def __init__(self, command_name, proj_id_handler, headers, debug, logger, 82 def __init__(self, command_name, debug, logger, gsutil_api, url_strs,
143 bucket_storage_uri_class, uri_strs, recursion_requested, 83 recursion_requested, all_versions=False,
144 have_existing_dst_container=None, flat=True, 84 cmd_supports_recursion=True, project_id=None,
145 all_versions=False, for_all_version_delete=False, 85 continue_on_error=False):
146 cmd_supports_recursion=True): 86 """Creates a NameExpansionIterator.
147 """ 87
148 Args: 88 Args:
149 command_name: name of command being run. 89 command_name: name of command being run.
150 proj_id_handler: ProjectIdHandler to use for current command. 90 debug: Debug level to pass to underlying iterators (range 0..3).
151 headers: Dictionary containing optional HTTP headers to pass to boto.
152 debug: Debug level to pass in to boto connection (range 0..3).
153 logger: logging.Logger object. 91 logger: logging.Logger object.
154 bucket_storage_uri_class: Class to instantiate for cloud StorageUris. 92 gsutil_api: Cloud storage interface. Settable for testing/mocking.
155 Settable for testing/mocking. 93 url_strs: PluralityCheckableIterator of URL strings needing expansion.
156 uri_strs: PluralityCheckableIterator of URI strings needing expansion. 94 recursion_requested: True if -R specified on command-line. If so,
157 recursion_requested: True if -R specified on command-line. 95 listings will be flattened so mapped-to results contain objects
158 have_existing_dst_container: Bool indicator whether this is a copy 96 spanning subdirectories.
159 request to an existing bucket, bucket subdir, or directory. Default
160 None value should be used in cases where this is not needed (commands
161 other than cp).
162 flat: Bool indicating whether bucket listings should be flattened, i.e.,
163 so the mapped-to results contain objects spanning subdirectories.
164 all_versions: Bool indicating whether to iterate over all object versions. 97 all_versions: Bool indicating whether to iterate over all object versions.
165 for_all_version_delete: Bool indicating whether this is for an all-version 98 cmd_supports_recursion: Bool indicating whether this command supports a
166 delete. 99 '-R' flag. Useful for printing helpful error messages.
167 cmd_supports_recursion: Bool indicating whether this command supports a '-R' 100 project_id: Project id to use for bucket retrieval.
168 flag. Useful for printing helpful error messages. 101 continue_on_error: If true, yield no-match exceptions encountered during
102 iteration instead of raising them.
169 103
170 Examples of _NameExpansionIterator with flat=True: 104 Examples of _NameExpansionIterator with recursion_requested=True:
171 - Calling with one of the uri_strs being 'gs://bucket' will enumerate all 105 - Calling with one of the url_strs being 'gs://bucket' will enumerate all
172 top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'. 106 top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
173 - 'gs://bucket/**' will enumerate all objects in the bucket. 107 - 'gs://bucket/**' will enumerate all objects in the bucket.
174 - 'gs://bucket/abc' will enumerate all next-level objects under directory 108 - 'gs://bucket/abc' will enumerate either the single object abc or, if
175 abc (i.e., not including subdirectories of abc) if gs://bucket/abc/* 109 abc is a subdirectory, all objects under abc and any of its
176 matches any objects; otherwise it will enumerate the single name 110 subdirectories.
177 gs://bucket/abc
178 - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its 111 - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
179 subdirectories. 112 subdirectories.
180 - 'file:///tmp' will enumerate all files under /tmp, as will 113 - 'file:///tmp' will enumerate all files under /tmp, as will
181 'file:///tmp/*' 114 'file:///tmp/*'
182 - 'file:///tmp/**' will enumerate all files under /tmp or any of its 115 - 'file:///tmp/**' will enumerate all files under /tmp or any of its
183 subdirectories. 116 subdirectories.
184 117
185 Example if flat=False: calling with gs://bucket/abc/* lists matching objects 118 Example if recursion_requested=False:
186 or subdirs, but not sub-subdirs or objects beneath subdirs. 119 calling with gs://bucket/abc/* lists matching objects
120 or subdirs, but not sub-subdirs or objects beneath subdirs.
187 121
188 Note: In step-by-step comments below we give examples assuming there's a 122 Note: In step-by-step comments below we give examples assuming there's a
189 gs://bucket with object paths: 123 gs://bucket with object paths:
190 abcd/o1.txt 124 abcd/o1.txt
191 abcd/o2.txt 125 abcd/o2.txt
192 xyz/o1.txt 126 xyz/o1.txt
193 xyz/o2.txt 127 xyz/o2.txt
194 and a directory file://dir with file paths: 128 and a directory file://dir with file paths:
195 dir/a.txt 129 dir/a.txt
196 dir/b.txt 130 dir/b.txt
197 dir/c/ 131 dir/c/
198 """ 132 """
199 self.command_name = command_name 133 self.command_name = command_name
200 self.proj_id_handler = proj_id_handler
201 self.headers = headers
202 self.debug = debug 134 self.debug = debug
203 self.logger = logger 135 self.logger = logger
204 self.bucket_storage_uri_class = bucket_storage_uri_class 136 self.gsutil_api = gsutil_api
205 self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class) 137 self.url_strs = url_strs
206 self.uri_strs = uri_strs
207 self.recursion_requested = recursion_requested 138 self.recursion_requested = recursion_requested
208 self.have_existing_dst_container = have_existing_dst_container
209 self.flat = flat
210 self.all_versions = all_versions 139 self.all_versions = all_versions
211 # Check self.uri_strs.has_plurality() at start because its value can change 140 # Check self.url_strs.HasPlurality() at start because its value can change
212 # if uri_strs is itself an iterator. 141 # if url_strs is itself an iterator.
213 self.uri_strs.has_plurality = self.uri_strs.has_plurality() 142 self.url_strs.has_plurality = self.url_strs.HasPlurality()
214 self.cmd_supports_recursion = cmd_supports_recursion 143 self.cmd_supports_recursion = cmd_supports_recursion
144 self.project_id = project_id
145 self.continue_on_error = continue_on_error
215 146
216 # Map holding wildcard strings to use for flat vs subdir-by-subdir listings. 147 # Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
217 # (A flat listing means show all objects expanded all the way down.) 148 # (A flat listing means show all objects expanded all the way down.)
218 self._flatness_wildcard = {True: '**', False: '*'} 149 self._flatness_wildcard = {True: '**', False: '*'}
219 150
220 def __iter__(self): 151 def __iter__(self):
221 for uri_str in self.uri_strs: 152 """Iterates over all source URLs passed to the iterator.
153
154 For each src url, expands wildcards, object-less bucket names,
155 subdir bucket names, and directory names, and generates a flat listing of
156 all the matching objects/files.
157
158 You should instantiate this object using the static factory function
159 NameExpansionIterator, because consumers of this iterator need the
160 PluralityCheckableIterator wrapper built by that function.
161
162 Yields:
163 gslib.name_expansion.NameExpansionResult.
164
165 Raises:
166 CommandException: if errors encountered.
167 """
168 for url_str in self.url_strs:
169 storage_url = StorageUrlFromString(url_str)
170
171 if storage_url.IsFileUrl() and storage_url.IsStream():
172 if self.url_strs.has_plurality:
173 raise CommandException('Multiple URL strings are not supported '
174 'with streaming ("-") URLs.')
175 yield NameExpansionResult(storage_url, False, False, storage_url)
176 continue
177
222 # Step 1: Expand any explicitly specified wildcards. The output from this 178 # Step 1: Expand any explicitly specified wildcards. The output from this
223 # step is an iterator of BucketListingRef. 179 # step is an iterator of BucketListingRef.
224 # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd 180 # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd
225 if ContainsWildcard(uri_str): 181
226 post_step1_iter = self._WildcardIterator(uri_str) 182 src_names_bucket = False
183 if (storage_url.IsCloudUrl() and storage_url.IsBucket()
184 and not self.recursion_requested):
185 # UNIX commands like rm and cp will omit directory references.
186 # If url_str refers only to buckets and we are not recursing,
187 # then produce references of type BUCKET, because they are guaranteed
188 # to pass through Step 2 and be omitted in Step 3.
189 post_step1_iter = PluralityCheckableIterator(
190 self.WildcardIterator(url_str).IterBuckets(
191 bucket_fields=['id']))
227 else: 192 else:
228 suri = self.suri_builder.StorageUri(uri_str) 193 # Get a list of objects and prefixes, expanding the top level for
229 post_step1_iter = iter([BucketListingRef(suri)]) 194 # any listed buckets. If our source is a bucket, however, we need
230 post_step1_iter = PluralityCheckableIterator(post_step1_iter) 195 # to treat all of the top level expansions as names_container=True.
231 196 post_step1_iter = PluralityCheckableIterator(
232 # Step 2: Expand bucket subdirs and versions. The output from this 197 self.WildcardIterator(url_str).IterAll(
198 bucket_listing_fields=['name'],
199 expand_top_level_buckets=True))
200 if storage_url.IsCloudUrl() and storage_url.IsBucket():
201 src_names_bucket = True
202
203 # Step 2: Expand bucket subdirs. The output from this
233 # step is an iterator of (names_container, BucketListingRef). 204 # step is an iterator of (names_container, BucketListingRef).
234 # Starting with gs://bucket/abcd this step would expand to: 205 # Starting with gs://bucket/abcd this step would expand to:
235 # iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]). 206 # iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
236 if self.flat and self.recursion_requested: 207 subdir_exp_wildcard = self._flatness_wildcard[self.recursion_requested]
237 post_step2_iter = _ImplicitBucketSubdirIterator(self, 208 if self.recursion_requested:
238 post_step1_iter, self.flat) 209 post_step2_iter = _ImplicitBucketSubdirIterator(
239 elif self.all_versions: 210 self, post_step1_iter, subdir_exp_wildcard)
240 post_step2_iter = _AllVersionIterator(self, post_step1_iter,
241 headers=self.headers)
242 else: 211 else:
243 post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter) 212 post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
244 post_step2_iter = PluralityCheckableIterator(post_step2_iter) 213 post_step2_iter = PluralityCheckableIterator(post_step2_iter)
245 214
246 # Step 3. Expand directories and buckets. This step yields the iterated 215 # Because we actually perform and check object listings here, this will
216 # raise if url_args includes a non-existent object. However,
217 # plurality_checkable_iterator will buffer the exception for us, not
218 # raising it until the iterator is actually asked to yield the first
219 # result.
220 if post_step2_iter.IsEmpty():
221 if self.continue_on_error:
222 try:
223 raise CommandException('No URLs matched: %s' % url_str)
224 except CommandException, e:
225 # Yield a specialized tuple of (exception, stack_trace) to
226 # the wrapping PluralityCheckableIterator.
227 yield (e, sys.exc_info()[2])
228 else:
229 raise CommandException('No URLs matched: %s' % url_str)
230
231 # Step 3. Omit any directories, buckets, or bucket subdirectories for
232 # non-recursive expansions.
233 post_step3_iter = PluralityCheckableIterator(_OmitNonRecursiveIterator(
234 post_step2_iter, self.recursion_requested, self.command_name,
235 self.cmd_supports_recursion, self.logger))
236
237 src_url_expands_to_multi = post_step3_iter.HasPlurality()
238 is_multi_source_request = (self.url_strs.has_plurality
239 or src_url_expands_to_multi)
240
241 # Step 4. Expand directories and buckets. This step yields the iterated
247 # values. Starting with gs://bucket this step would expand to: 242 # values. Starting with gs://bucket this step would expand to:
248 # [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt] 243 # [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
249 # Starting with file://dir this step would expand to: 244 # Starting with file://dir this step would expand to:
250 # [dir/a.txt, dir/b.txt, dir/c/] 245 # [dir/a.txt, dir/b.txt, dir/c/]
251 exp_src_bucket_listing_refs = [] 246 for (names_container, blr) in post_step3_iter:
252 wc = self._flatness_wildcard[self.flat] 247 src_names_container = src_names_bucket or names_container
253 src_uri_expands_to_multi = (post_step1_iter.has_plurality() 248
254 or post_step2_iter.has_plurality()) 249 if blr.IsObject():
255 is_multi_src_request = (self.uri_strs.has_plurality 250 yield NameExpansionResult(
256 or src_uri_expands_to_multi) 251 storage_url, is_multi_source_request, src_names_container,
257 252 blr.storage_url)
258 if post_step2_iter.is_empty(): 253 else:
259 raise CommandException('No URIs matched: %s' % uri_str) 254 # Use implicit wildcarding to do the enumeration.
260 for (names_container, blr) in post_step2_iter: 255 # At this point we are guaranteed that:
261 if (not blr.GetUri().names_container() 256 # - Recursion has been requested because non-object entries are
262 and (self.flat or not blr.HasPrefix())): 257 # filtered in step 3 otherwise.
263 yield NameExpansionResult(uri_str, is_multi_src_request, 258 # - This is a prefix or bucket subdirectory because only
264 src_uri_expands_to_multi, names_container, 259 # non-recursive iterations product bucket references.
265 blr.GetUriString(), 260 expanded_url = StorageUrlFromString(blr.url_string)
266 self.have_existing_dst_container, 261 if expanded_url.IsFileUrl():
267 is_latest=blr.IsLatest()) 262 # Convert dir to implicit recursive wildcard.
268 continue 263 url_to_iterate = '%s%s%s' % (blr, os.sep, subdir_exp_wildcard)
269 if not self.recursion_requested:
270 if blr.GetUri().is_file_uri():
271 desc = 'directory'
272 elif blr.GetUri().names_bucket():
273 desc = 'bucket'
274 else: 264 else:
275 desc = 'bucket subdir' 265 # Convert subdir to implicit recursive wildcard.
276 if self.cmd_supports_recursion: 266 url_to_iterate = expanded_url.CreatePrefixUrl(
277 self.logger.info( 267 wildcard_suffix=subdir_exp_wildcard)
278 'Omitting %s "%s". (Did you mean to do %s -R?)', 268
279 desc, blr.GetUri(), self.command_name) 269 wc_iter = PluralityCheckableIterator(
280 else: 270 self.WildcardIterator(url_to_iterate).IterObjects(
281 self.logger.info('Omitting %s "%s".', desc, blr.GetUri()) 271 bucket_listing_fields=['name']))
282 continue 272 src_url_expands_to_multi = (src_url_expands_to_multi
283 if blr.GetUri().is_file_uri(): 273 or wc_iter.HasPlurality())
284 # Convert dir to implicit recursive wildcard. 274 is_multi_source_request = (self.url_strs.has_plurality
285 uri_to_iterate = '%s/%s' % (blr.GetUriString(), wc) 275 or src_url_expands_to_multi)
286 else: 276 # This will be a flattened listing of all underlying objects in the
287 # Convert bucket to implicit recursive wildcard. 277 # subdir.
288 uri_to_iterate = blr.GetUri().clone_replace_name(wc) 278 for blr in wc_iter:
289 wc_iter = PluralityCheckableIterator( 279 yield NameExpansionResult(
290 self._WildcardIterator(uri_to_iterate)) 280 storage_url, is_multi_source_request, True, blr.storage_url)
291 src_uri_expands_to_multi = (src_uri_expands_to_multi 281
292 or wc_iter.has_plurality()) 282 def WildcardIterator(self, url_string):
293 is_multi_src_request = (self.uri_strs.has_plurality 283 """Helper to instantiate gslib.WildcardIterator.
294 or src_uri_expands_to_multi) 284
295 for blr in wc_iter: 285 Args are same as gslib.WildcardIterator interface, but this method fills
296 yield NameExpansionResult(uri_str, is_multi_src_request, 286 in most of the values from instance state.
297 src_uri_expands_to_multi, True, 287
298 blr.GetUriString(), 288 Args:
299 self.have_existing_dst_container, 289 url_string: URL string naming wildcard objects to iterate.
300 is_latest=blr.IsLatest()) 290
301 291 Returns:
302 def _WildcardIterator(self, uri_or_str): 292 Wildcard iterator over URL string.
303 """ 293 """
304 Helper to instantiate gslib.WildcardIterator. Args are same as 294 return gslib.wildcard_iterator.CreateWildcardIterator(
305 gslib.WildcardIterator interface, but this method fills in most of the 295 url_string, self.gsutil_api, debug=self.debug,
306 values from instance state. 296 all_versions=self.all_versions,
307 297 project_id=self.project_id)
308 Args: 298
309 uri_or_str: StorageUri or URI string naming wildcard objects to iterate. 299
310 """ 300 def NameExpansionIterator(command_name, debug, logger, gsutil_api, url_strs,
311 return wildcard_iterator.wildcard_iterator( 301 recursion_requested, all_versions=False,
312 uri_or_str, self.proj_id_handler, 302 cmd_supports_recursion=True, project_id=None,
313 bucket_storage_uri_class=self.bucket_storage_uri_class, 303 continue_on_error=False):
314 headers=self.headers, debug=self.debug, 304 """Static factory function for instantiating _NameExpansionIterator.
315 all_versions=self.all_versions) 305
316 306 This wraps the resulting iterator in a PluralityCheckableIterator and checks
317 307 that it is non-empty. Also, allows url_strs to be either an array or an
318 def NameExpansionIterator(command_name, proj_id_handler, headers, debug,
319 logger, bucket_storage_uri_class, uri_strs,
320 recursion_requested,
321 have_existing_dst_container=None, flat=True,
322 all_versions=False,
323 for_all_version_delete=False,
324 cmd_supports_recursion=True):
325 """
326 Static factory function for instantiating _NameExpansionIterator, which
327 wraps the resulting iterator in a PluralityCheckableIterator and checks
328 that it is non-empty. Also, allows uri_strs can be either an array or an
329 iterator. 308 iterator.
330 309
331 Args: 310 Args:
332 command_name: name of command being run. 311 command_name: name of command being run.
333 proj_id_handler: ProjectIdHandler to use for current command. 312 debug: Debug level to pass to underlying iterators (range 0..3).
334 headers: Dictionary containing optional HTTP headers to pass to boto.
335 debug: Debug level to pass in to boto connection (range 0..3).
336 logger: logging.Logger object. 313 logger: logging.Logger object.
337 bucket_storage_uri_class: Class to instantiate for cloud StorageUris. 314 gsutil_api: Cloud storage interface. Settable for testing/mocking.
338 Settable for testing/mocking. 315 url_strs: Iterable URL strings needing expansion.
339 uri_strs: PluralityCheckableIterator of URI strings needing expansion. 316 recursion_requested: True if -R specified on command-line. If so,
340 recursion_requested: True if -R specified on command-line. 317 listings will be flattened so mapped-to results contain objects
341 have_existing_dst_container: Bool indicator whether this is a copy 318 spanning subdirectories.
342 request to an existing bucket, bucket subdir, or directory. Default
343 None value should be used in cases where this is not needed (commands
344 other than cp).
345 flat: Bool indicating whether bucket listings should be flattened, i.e.,
346 so the mapped-to results contain objects spanning subdirectories.
347 all_versions: Bool indicating whether to iterate over all object versions. 319 all_versions: Bool indicating whether to iterate over all object versions.
348 for_all_version_delete: Bool indicating whether this is for an all-version
349 delete.
350 cmd_supports_recursion: Bool indicating whether this command supports a '-R' 320 cmd_supports_recursion: Bool indicating whether this command supports a '-R'
351 flag. Useful for printing helpful error messages. 321 flag. Useful for printing helpful error messages.
352 322 project_id: Project id to use for the current command.
353 Examples of ExpandWildcardsAndContainers with flat=True: 323 continue_on_error: If true, yield no-match exceptions encountered during
354 - Calling with one of the uri_strs being 'gs://bucket' will enumerate all 324 iteration instead of raising them.
355 top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'. 325
356 - 'gs://bucket/**' will enumerate all objects in the bucket. 326 Raises:
357 - 'gs://bucket/abc' will enumerate all next-level objects under directory 327 CommandException if underlying iterator is empty.
358 abc (i.e., not including subdirectories of abc) if gs://bucket/abc/* 328
359 matches any objects; otherwise it will enumerate the single name 329 Returns:
360 gs://bucket/abc 330 Name expansion iterator instance.
361 - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its 331
362 subdirectories. 332 For example semantics, see comments in NameExpansionIterator.__init__.
363 - 'file:///tmp' will enumerate all files under /tmp, as will
364 'file:///tmp/*'
365 - 'file:///tmp/**' will enumerate all files under /tmp or any of its
366 subdirectories.
367
368 Example if flat=False: calling with gs://bucket/abc/* lists matching objects
369 or subdirs, but not sub-subdirs or objects beneath subdirs.
370
371 Note: In step-by-step comments below we give examples assuming there's a
372 gs://bucket with object paths:
373 abcd/o1.txt
374 abcd/o2.txt
375 xyz/o1.txt
376 xyz/o2.txt
377 and a directory file://dir with file paths:
378 dir/a.txt
379 dir/b.txt
380 dir/c/
381 """ 333 """
382 uri_strs = PluralityCheckableIterator(uri_strs) 334 url_strs = PluralityCheckableIterator(url_strs)
383 name_expansion_iterator = _NameExpansionIterator( 335 name_expansion_iterator = _NameExpansionIterator(
384 command_name, proj_id_handler, headers, debug, logger, 336 command_name, debug, logger, gsutil_api, url_strs, recursion_requested,
385 bucket_storage_uri_class, uri_strs, recursion_requested, 337 all_versions=all_versions, cmd_supports_recursion=cmd_supports_recursion,
386 have_existing_dst_container, flat, all_versions=all_versions, 338 project_id=project_id, continue_on_error=continue_on_error)
387 for_all_version_delete=for_all_version_delete,
388 cmd_supports_recursion=cmd_supports_recursion)
389 name_expansion_iterator = PluralityCheckableIterator(name_expansion_iterator) 339 name_expansion_iterator = PluralityCheckableIterator(name_expansion_iterator)
390 if name_expansion_iterator.is_empty(): 340 if name_expansion_iterator.IsEmpty():
391 raise CommandException('No URIs matched') 341 raise CommandException('No URLs matched')
392 return name_expansion_iterator 342 return name_expansion_iterator
393 343
394 344
395 class NameExpansionIteratorQueue(object): 345 class NameExpansionIteratorQueue(object):
396 """ 346 """Wrapper around NameExpansionIterator with Multiprocessing.Queue interface.
397 Wrapper around NameExpansionIterator that provides a Multiprocessing.Queue
398 facade.
399 347
400 Only a blocking get() function can be called, and the block and timeout 348 Only a blocking get() function can be called, and the block and timeout
401 params on that function are ignored. All other class functions raise 349 params on that function are ignored. All other class functions raise
402 NotImplementedError. 350 NotImplementedError.
403 351
404 This class is thread safe. 352 This class is thread safe.
405 """ 353 """
406 354
407 def __init__(self, name_expansion_iterator, final_value): 355 def __init__(self, name_expansion_iterator, final_value):
408 self.name_expansion_iterator = name_expansion_iterator 356 self.name_expansion_iterator = name_expansion_iterator
409 self.final_value = final_value 357 self.final_value = final_value
410 self.lock = multiprocessing.Manager().Lock() 358 self.lock = multiprocessing.Manager().Lock()
411 359
412 def qsize(self): 360 def qsize(self):
413 raise NotImplementedError( 361 raise NotImplementedError(
414 "NameExpansionIteratorQueue.qsize() not implemented") 362 'NameExpansionIteratorQueue.qsize() not implemented')
415 363
416 def empty(self): 364 def empty(self):
417 raise NotImplementedError( 365 raise NotImplementedError(
418 "NameExpansionIteratorQueue.empty() not implemented") 366 'NameExpansionIteratorQueue.empty() not implemented')
419 367
420 def full(self): 368 def full(self):
421 raise NotImplementedError( 369 raise NotImplementedError(
422 "NameExpansionIteratorQueue.full() not implemented") 370 'NameExpansionIteratorQueue.full() not implemented')
423 371
372 # pylint: disable=unused-argument
424 def put(self, obj=None, block=None, timeout=None): 373 def put(self, obj=None, block=None, timeout=None):
425 raise NotImplementedError( 374 raise NotImplementedError(
426 "NameExpansionIteratorQueue.put() not implemented") 375 'NameExpansionIteratorQueue.put() not implemented')
427 376
428 def put_nowait(self, obj): 377 def put_nowait(self, obj):
429 raise NotImplementedError( 378 raise NotImplementedError(
430 "NameExpansionIteratorQueue.put_nowait() not implemented") 379 'NameExpansionIteratorQueue.put_nowait() not implemented')
431 380
381 # pylint: disable=unused-argument
432 def get(self, block=None, timeout=None): 382 def get(self, block=None, timeout=None):
433 self.lock.acquire() 383 self.lock.acquire()
434 try: 384 try:
435 if self.name_expansion_iterator.is_empty(): 385 if self.name_expansion_iterator.IsEmpty():
436 return self.final_value 386 return self.final_value
437 return self.name_expansion_iterator.next() 387 return self.name_expansion_iterator.next()
438 finally: 388 finally:
439 self.lock.release() 389 self.lock.release()
440 390
441 def get_nowait(self): 391 def get_nowait(self):
442 raise NotImplementedError( 392 raise NotImplementedError(
443 "NameExpansionIteratorQueue.get_nowait() not implemented") 393 'NameExpansionIteratorQueue.get_nowait() not implemented')
444 394
445 def get_no_wait(self): 395 def get_no_wait(self):
446 raise NotImplementedError( 396 raise NotImplementedError(
447 "NameExpansionIteratorQueue.get_no_wait() not implemented") 397 'NameExpansionIteratorQueue.get_no_wait() not implemented')
448 398
449 def close(self): 399 def close(self):
450 raise NotImplementedError( 400 raise NotImplementedError(
451 "NameExpansionIteratorQueue.close() not implemented") 401 'NameExpansionIteratorQueue.close() not implemented')
452 402
453 def join_thread(self): 403 def join_thread(self):
454 raise NotImplementedError( 404 raise NotImplementedError(
455 "NameExpansionIteratorQueue.join_thread() not implemented") 405 'NameExpansionIteratorQueue.join_thread() not implemented')
456 406
457 def cancel_join_thread(self): 407 def cancel_join_thread(self):
458 raise NotImplementedError( 408 raise NotImplementedError(
459 "NameExpansionIteratorQueue.cancel_join_thread() not implemented") 409 'NameExpansionIteratorQueue.cancel_join_thread() not implemented')
460 410
461 411
462 class _NonContainerTuplifyIterator(object): 412 class _NonContainerTuplifyIterator(object):
463 """ 413 """Iterator that produces the tuple (False, blr) for each iterated value.
464 Iterator that produces the tuple (False, blr) for each iteration 414
465 of blr_iter. Used for cases where blr_iter iterates over a set of 415 Used for cases where blr_iter iterates over a set of
466 BucketListingRefs known not to name containers. 416 BucketListingRefs known not to name containers.
467 """ 417 """
468 418
469 def __init__(self, blr_iter): 419 def __init__(self, blr_iter):
470 """ 420 """Instantiates iterator.
421
471 Args: 422 Args:
472 blr_iter: iterator of BucketListingRef. 423 blr_iter: iterator of BucketListingRef.
473 """ 424 """
474 self.blr_iter = blr_iter 425 self.blr_iter = blr_iter
475 426
476 def __iter__(self): 427 def __iter__(self):
477 for blr in self.blr_iter: 428 for blr in self.blr_iter:
478 yield (False, blr) 429 yield (False, blr)
479 430
480 431
432 class _OmitNonRecursiveIterator(object):
433 """Iterator wrapper for that omits certain values for non-recursive requests.
434
435 This iterates over tuples of (names_container, BucketListingReference) and
436 omits directories, prefixes, and buckets from non-recurisve requests
437 so that we can properly calculate whether the source URL expands to multiple
438 URLs.
439
440 For example, if we have a bucket containing two objects: bucket/foo and
441 bucket/foo/bar and we do a non-recursive iteration, only bucket/foo will be
442 yielded.
443 """
444
445 def __init__(self, tuple_iter, recursion_requested, command_name,
446 cmd_supports_recursion, logger):
447 """Instanties the iterator.
448
449 Args:
450 tuple_iter: Iterator over names_container, BucketListingReference
451 from step 2 in the NameExpansionIterator
452 recursion_requested: If false, omit buckets, dirs, and subdirs
453 command_name: Command name for user messages
454 cmd_supports_recursion: Command recursion support for user messages
455 logger: Log object for user messages
456 """
457 self.tuple_iter = tuple_iter
458 self.recursion_requested = recursion_requested
459 self.command_name = command_name
460 self.cmd_supports_recursion = cmd_supports_recursion
461 self.logger = logger
462
463 def __iter__(self):
464 for (names_container, blr) in self.tuple_iter:
465 if not self.recursion_requested and not blr.IsObject():
466 # At this point we either have a bucket or a prefix,
467 # so if recursion is not requested, we're going to omit it.
468 expanded_url = StorageUrlFromString(blr.url_string)
469 if expanded_url.IsFileUrl():
470 desc = 'directory'
471 else:
472 desc = blr.type_name
473 if self.cmd_supports_recursion:
474 self.logger.info(
475 'Omitting %s "%s". (Did you mean to do %s -R?)',
476 desc, blr.url_string, self.command_name)
477 else:
478 self.logger.info('Omitting %s "%s".', desc, blr.url_string)
479 else:
480 yield (names_container, blr)
481
482
481 class _ImplicitBucketSubdirIterator(object): 483 class _ImplicitBucketSubdirIterator(object):
482 484 """Iterator wrapper that performs implicit bucket subdir expansion.
483 """
484 Iterator wrapper that iterates over blr_iter, performing implicit bucket
485 subdir expansion.
486 485
487 Each iteration yields tuple (names_container, expanded BucketListingRefs) 486 Each iteration yields tuple (names_container, expanded BucketListingRefs)
488 where names_container is true if URI names a directory, bucket, 487 where names_container is true if URL names a directory, bucket,
489 or bucket subdir (vs how StorageUri.names_container() doesn't 488 or bucket subdir.
490 handle latter case).
491 489
492 For example, iterating over [BucketListingRef("gs://abc")] would expand to: 490 For example, iterating over [BucketListingRef("gs://abc")] would expand to:
493 [BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")] 491 [BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")]
494 if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise. 492 if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise.
495 """ 493 """
496 494
497 def __init__(self, name_expansion_instance, blr_iter, flat): 495 def __init__(self, name_exp_instance, blr_iter, subdir_exp_wildcard):
498 """ 496 """Instantiates the iterator.
497
499 Args: 498 Args:
500 name_expansion_instance: calling instance of NameExpansion class. 499 name_exp_instance: calling instance of NameExpansion class.
501 blr_iter: iterator of BucketListingRef. 500 blr_iter: iterator over BucketListingRef prefixes and objects.
502 flat: bool indicating whether bucket listings should be flattened, i.e., 501 subdir_exp_wildcard: wildcard for expanding subdirectories;
503 so the mapped-to results contain objects spanning subdirectories. 502 expected values are ** if the mapped-to results should contain
503 objects spanning subdirectories, or * if only one level should
504 be listed.
504 """ 505 """
505 self.blr_iter = blr_iter 506 self.blr_iter = blr_iter
506 self.name_expansion_instance = name_expansion_instance 507 self.name_exp_instance = name_exp_instance
507 self.flat = flat 508 self.subdir_exp_wildcard = subdir_exp_wildcard
508 509
509 def __iter__(self): 510 def __iter__(self):
510 for blr in self.blr_iter: 511 for blr in self.blr_iter:
511 uri = blr.GetUri() 512 if blr.IsPrefix():
512 if uri.names_object(): 513 # This is a bucket subdirectory, list objects according to the wildcard.
513 # URI could be a bucket subdir. 514 prefix_url = StorageUrlFromString(blr.url_string).CreatePrefixUrl(
515 wildcard_suffix=self.subdir_exp_wildcard)
514 implicit_subdir_iterator = PluralityCheckableIterator( 516 implicit_subdir_iterator = PluralityCheckableIterator(
515 self.name_expansion_instance._WildcardIterator( 517 self.name_exp_instance.WildcardIterator(
516 self.name_expansion_instance.suri_builder.StorageUri( 518 prefix_url).IterAll(bucket_listing_fields=['name']))
517 '%s/%s' % (uri.uri.rstrip('/'), 519 if not implicit_subdir_iterator.IsEmpty():
518 self.name_expansion_instance._flatness_wildcard[
519 self.flat]))))
520 if not implicit_subdir_iterator.is_empty():
521 for exp_blr in implicit_subdir_iterator: 520 for exp_blr in implicit_subdir_iterator:
522 yield (True, exp_blr) 521 yield (True, exp_blr)
523 else: 522 else:
523 # Prefix that contains no objects, for example in the $folder$ case
524 # or an empty filesystem directory.
524 yield (False, blr) 525 yield (False, blr)
526 elif blr.IsObject():
527 yield (False, blr)
525 else: 528 else:
526 yield (False, blr) 529 raise CommandException(
527 530 '_ImplicitBucketSubdirIterator got a bucket reference %s' % blr)
528 class _AllVersionIterator(object):
529 """
530 Iterator wrapper that iterates over blr_iter, performing implicit version
531 expansion.
532
533 Output behavior is identical to that in _ImplicitBucketSubdirIterator above.
534
535 For example, iterating over [BucketListingRef("gs://abc/o1")] would expand to:
536 [BucketListingRef("gs://abc/o1#1234"), BucketListingRef("gs://abc/o1#1235")]
537 """
538
539 def __init__(self, name_expansion_instance, blr_iter, headers=None):
540 """
541 Args:
542 name_expansion_instance: calling instance of NameExpansion class.
543 blr_iter: iterator of BucketListingRef.
544 flat: bool indicating whether bucket listings should be flattened, i.e.,
545 so the mapped-to results contain objects spanning subdirectories.
546 """
547 self.blr_iter = blr_iter
548 self.name_expansion_instance = name_expansion_instance
549 self.headers = headers
550
551 def __iter__(self):
552 empty = True
553 for blr in self.blr_iter:
554 uri = blr.GetUri()
555 if not uri.names_object():
556 empty = False
557 yield (True, blr)
558 break
559 for key in uri.list_bucket(
560 prefix=uri.object_name, headers=self.headers, all_versions=True):
561 if key.name != uri.object_name:
562 # The desired entries will be alphabetically first in this listing.
563 break
564 version_blr = BucketListingRef(uri.clone_replace_key(key), key=key)
565 empty = False
566 yield (False, version_blr)
567 # If no version exists, yield the unversioned blr, and let the consuming
568 # operation fail. This mirrors behavior in _ImplicitBucketSubdirIterator.
569 if empty:
570 yield (False, blr)
571
OLDNEW
« no previous file with comments | « gslib/ls_helper.py ('k') | gslib/no_op_auth_plugin.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698