| OLD | NEW | 
|---|
|  | (Empty) | 
| 1 # Copyright 2010 Google Inc. All Rights Reserved. |  | 
| 2 # |  | 
| 3 # Permission is hereby granted, free of charge, to any person obtaining a |  | 
| 4 # copy of this software and associated documentation files (the |  | 
| 5 # "Software"), to deal in the Software without restriction, including |  | 
| 6 # without limitation the rights to use, copy, modify, merge, publish, dis- |  | 
| 7 # tribute, sublicense, and/or sell copies of the Software, and to permit |  | 
| 8 # persons to whom the Software is furnished to do so, subject to the fol- |  | 
| 9 # lowing conditions: |  | 
| 10 # |  | 
| 11 # The above copyright notice and this permission notice shall be included |  | 
| 12 # in all copies or substantial portions of the Software. |  | 
| 13 # |  | 
| 14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |  | 
| 15 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |  | 
| 16 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |  | 
| 17 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |  | 
| 18 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |  | 
| 19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |  | 
| 20 # IN THE SOFTWARE. |  | 
| 21 |  | 
| 22 """Implementation of wildcarding over StorageUris. |  | 
| 23 |  | 
| 24 StorageUri is an abstraction that Google introduced in the boto library, |  | 
| 25 for representing storage provider-independent bucket and object names with |  | 
| 26 a shorthand URI-like syntax (see boto/boto/storage_uri.py) The current |  | 
| 27 class provides wildcarding support for StorageUri objects (including both |  | 
| 28 bucket and file system objects), allowing one to express collections of |  | 
| 29 objects with syntax like the following: |  | 
| 30   gs://mybucket/images/*.png |  | 
| 31   file:///tmp/???abc??? |  | 
| 32 |  | 
| 33 We provide wildcarding support as part of gsutil rather than as part |  | 
| 34 of boto because wildcarding is really part of shell command-like |  | 
| 35 functionality. |  | 
| 36 |  | 
| 37 A comment about wildcard semantics: We support both single path component |  | 
| 38 wildcards (e.g., using '*') and recursive wildcards (using '**'), for both |  | 
| 39 file and cloud URIs. For example, |  | 
| 40   gs://bucket/doc/*/*.html |  | 
| 41 would enumerate HTML files one directory down from gs://bucket/doc, while |  | 
| 42   gs://bucket/**/*.html |  | 
| 43 would enumerate HTML files in all objects contained in the bucket. |  | 
| 44 |  | 
| 45 Note also that if you use file system wildcards it's likely your shell |  | 
| 46 interprets the wildcarding before passing the command to gsutil. For example: |  | 
| 47   % gsutil cp /opt/eclipse/*/*.html gs://bucket/eclipse |  | 
| 48 would likely be expanded by the shell into the following before running gsutil: |  | 
| 49   % gsutil cp /opt/eclipse/RUNNING.html gs://bucket/eclipse |  | 
| 50 |  | 
| 51 Note also that most shells don't support '**' wildcarding (I think only |  | 
| 52 zsh does). If you want to use '**' wildcarding with such a shell you can |  | 
| 53 single quote each wildcarded string, so it gets passed uninterpreted by the |  | 
| 54 shell to gsutil (at which point gsutil will perform the wildcarding expansion): |  | 
| 55   % gsutil cp '/opt/eclipse/**/*.html' gs://bucket/eclipse |  | 
| 56 """ |  | 
| 57 |  | 
| 58 import boto |  | 
| 59 import fnmatch |  | 
| 60 import glob |  | 
| 61 import os |  | 
| 62 import re |  | 
| 63 import sys |  | 
| 64 import urllib |  | 
| 65 |  | 
| 66 from boto.s3.prefix import Prefix |  | 
| 67 from boto.storage_uri import BucketStorageUri |  | 
| 68 from bucket_listing_ref import BucketListingRef |  | 
| 69 |  | 
| 70 # Regex to determine if a string contains any wildcards. |  | 
| 71 WILDCARD_REGEX = re.compile('[*?\[\]]') |  | 
| 72 |  | 
| 73 WILDCARD_OBJECT_ITERATOR = 'wildcard_object_iterator' |  | 
| 74 WILDCARD_BUCKET_ITERATOR = 'wildcard_bucket_iterator' |  | 
| 75 |  | 
| 76 |  | 
| 77 class WildcardIterator(object): |  | 
| 78   """Base class for wildcarding over StorageUris. |  | 
| 79 |  | 
| 80   This class implements support for iterating over StorageUris that |  | 
| 81   contain wildcards. |  | 
| 82 |  | 
| 83   The base class is abstract; you should instantiate using the |  | 
| 84   wildcard_iterator() static factory method, which chooses the right |  | 
| 85   implementation depending on the StorageUri. |  | 
| 86   """ |  | 
| 87 |  | 
| 88   def __repr__(self): |  | 
| 89     """Returns string representation of WildcardIterator.""" |  | 
| 90     return 'WildcardIterator(%s)' % self.wildcard_uri |  | 
| 91 |  | 
| 92 |  | 
| 93 class CloudWildcardIterator(WildcardIterator): |  | 
| 94   """WildcardIterator subclass for buckets and objects. |  | 
| 95 |  | 
| 96   Iterates over BucketListingRef matching the StorageUri wildcard. It's |  | 
| 97   much more efficient to request the Key from the BucketListingRef (via |  | 
| 98   GetKey()) than to request the StorageUri and then call uri.get_key() |  | 
| 99   to retrieve the key, for cases where you want to get metadata that's |  | 
| 100   available in the Bucket (for example to get the name and size of |  | 
| 101   each object), because that information is available in the bucket GET |  | 
| 102   results. If you were to iterate over URIs for such cases and then get |  | 
| 103   the name and size info from each resulting StorageUri, it would cause |  | 
| 104   an additional object GET request for each of the result URIs. |  | 
| 105   """ |  | 
| 106 |  | 
| 107   def __init__(self, wildcard_uri, proj_id_handler, |  | 
| 108                bucket_storage_uri_class=BucketStorageUri, all_versions=False, |  | 
| 109                headers=None, debug=0): |  | 
| 110     """ |  | 
| 111     Instantiates an iterator over BucketListingRef matching given wildcard URI. |  | 
| 112 |  | 
| 113     Args: |  | 
| 114       wildcard_uri: StorageUri that contains the wildcard to iterate. |  | 
| 115       proj_id_handler: ProjectIdHandler to use for current command. |  | 
| 116       bucket_storage_uri_class: BucketStorageUri interface. |  | 
| 117                                 Settable for testing/mocking. |  | 
| 118       headers: Dictionary containing optional HTTP headers to pass to boto. |  | 
| 119       debug: Debug level to pass in to boto connection (range 0..3). |  | 
| 120     """ |  | 
| 121     self.wildcard_uri = wildcard_uri |  | 
| 122     # Make a copy of the headers so any updates we make during wildcard |  | 
| 123     # expansion aren't left in the input params (specifically, so we don't |  | 
| 124     # include the x-goog-project-id header needed by a subset of cases, in |  | 
| 125     # the data returned to caller, which could then be used in other cases |  | 
| 126     # where that header must not be passed). |  | 
| 127     if headers is None: |  | 
| 128       self.headers = {} |  | 
| 129     else: |  | 
| 130       self.headers = headers.copy() |  | 
| 131     self.proj_id_handler = proj_id_handler |  | 
| 132     self.bucket_storage_uri_class = bucket_storage_uri_class |  | 
| 133     self.all_versions = all_versions |  | 
| 134     self.debug = debug |  | 
| 135 |  | 
| 136   def __iter__(self): |  | 
| 137     """Python iterator that gets called when iterating over cloud wildcard. |  | 
| 138 |  | 
| 139     Yields: |  | 
| 140       BucketListingRef, or empty iterator if no matches. |  | 
| 141     """ |  | 
| 142     # First handle bucket wildcarding, if any. |  | 
| 143     if ContainsWildcard(self.wildcard_uri.bucket_name): |  | 
| 144       regex = fnmatch.translate(self.wildcard_uri.bucket_name) |  | 
| 145       bucket_uris = [] |  | 
| 146       prog = re.compile(regex) |  | 
| 147       self.proj_id_handler.FillInProjectHeaderIfNeeded(WILDCARD_BUCKET_ITERATOR, |  | 
| 148                                                        self.wildcard_uri, |  | 
| 149                                                        self.headers) |  | 
| 150       for b in self.wildcard_uri.get_all_buckets(headers=self.headers): |  | 
| 151         if prog.match(b.name): |  | 
| 152           # Use str(b.name) because get_all_buckets() returns Unicode |  | 
| 153           # string, which when used to construct x-goog-copy-src metadata |  | 
| 154           # requests for object-to-object copies causes pathname '/' chars |  | 
| 155           # to be entity-encoded (bucket%2Fdir instead of bucket/dir), |  | 
| 156           # which causes the request to fail. |  | 
| 157           uri_str = '%s://%s' % (self.wildcard_uri.scheme, |  | 
| 158                                  urllib.quote_plus(str(b.name))) |  | 
| 159           bucket_uris.append( |  | 
| 160               boto.storage_uri( |  | 
| 161                   uri_str, debug=self.debug, |  | 
| 162                   bucket_storage_uri_class=self.bucket_storage_uri_class, |  | 
| 163                   suppress_consec_slashes=False)) |  | 
| 164     else: |  | 
| 165       bucket_uris = [self.wildcard_uri.clone_replace_name('')] |  | 
| 166 |  | 
| 167     # Now iterate over bucket(s), and handle object wildcarding, if any. |  | 
| 168     self.proj_id_handler.FillInProjectHeaderIfNeeded(WILDCARD_OBJECT_ITERATOR, |  | 
| 169                                                      self.wildcard_uri, |  | 
| 170                                                      self.headers) |  | 
| 171     for bucket_uri in bucket_uris: |  | 
| 172       if self.wildcard_uri.names_bucket(): |  | 
| 173         # Bucket-only URI. |  | 
| 174         yield BucketListingRef(bucket_uri, key=None, prefix=None, |  | 
| 175                                headers=self.headers) |  | 
| 176       else: |  | 
| 177         # URI contains an object name. If there's no wildcard just yield |  | 
| 178         # the needed URI. |  | 
| 179         if not ContainsWildcard(self.wildcard_uri.object_name): |  | 
| 180           uri_to_yield = bucket_uri.clone_replace_name( |  | 
| 181               self.wildcard_uri.object_name) |  | 
| 182           yield BucketListingRef(uri_to_yield, key=None, prefix=None, |  | 
| 183                           headers=self.headers) |  | 
| 184         else: |  | 
| 185           # URI contains a wildcard. Expand iteratively by building |  | 
| 186           # prefix/delimiter bucket listing request, filtering the results per |  | 
| 187           # the current level's wildcard, and continuing with the next component |  | 
| 188           # of the wildcard. See _BuildBucketFilterStrings() documentation |  | 
| 189           # for details. |  | 
| 190           # |  | 
| 191           # Initialize the iteration with bucket name from bucket_uri but |  | 
| 192           # object name from self.wildcard_uri. This is needed to handle cases |  | 
| 193           # where both the bucket and object names contain wildcards. |  | 
| 194           uris_needing_expansion = [ |  | 
| 195               bucket_uri.clone_replace_name(self.wildcard_uri.object_name)] |  | 
| 196           while len(uris_needing_expansion) > 0: |  | 
| 197             uri = uris_needing_expansion.pop(0) |  | 
| 198             (prefix, delimiter, prefix_wildcard, suffix_wildcard) = ( |  | 
| 199                 self._BuildBucketFilterStrings(uri.object_name)) |  | 
| 200             prog = re.compile(fnmatch.translate(prefix_wildcard)) |  | 
| 201             # List bucket for objects matching prefix up to delimiter. |  | 
| 202             for key in bucket_uri.list_bucket(prefix=prefix, |  | 
| 203                                               delimiter=delimiter, |  | 
| 204                                               headers=self.headers, |  | 
| 205                                               all_versions=self.all_versions): |  | 
| 206               # Check that the prefix regex matches rstripped key.name (to |  | 
| 207               # correspond with the rstripped prefix_wildcard from |  | 
| 208               # _BuildBucketFilterStrings()). |  | 
| 209               if prog.match(key.name.rstrip('/')): |  | 
| 210                 if suffix_wildcard and key.name.rstrip('/') != suffix_wildcard: |  | 
| 211                   if isinstance(key, Prefix): |  | 
| 212                     # There's more wildcard left to expand. |  | 
| 213                     uris_needing_expansion.append( |  | 
| 214                         uri.clone_replace_name(key.name.rstrip('/') + '/' |  | 
| 215                         + suffix_wildcard)) |  | 
| 216                 else: |  | 
| 217                   # Done expanding. |  | 
| 218                   expanded_uri = uri.clone_replace_key(key) |  | 
| 219 |  | 
| 220                   if isinstance(key, Prefix): |  | 
| 221                     yield BucketListingRef(expanded_uri, key=None, prefix=key, |  | 
| 222                                            headers=self.headers) |  | 
| 223                   else: |  | 
| 224                     if self.all_versions: |  | 
| 225                       yield BucketListingRef(expanded_uri, key=key, prefix=None, |  | 
| 226                                              headers=self.headers) |  | 
| 227                     else: |  | 
| 228                       # Yield BLR wrapping version-less URI. |  | 
| 229                       yield BucketListingRef(expanded_uri.clone_replace_name( |  | 
| 230                           expanded_uri.object_name), key=key, prefix=None, |  | 
| 231                           headers=self.headers) |  | 
| 232 |  | 
| 233   def _BuildBucketFilterStrings(self, wildcard): |  | 
| 234     """ |  | 
| 235     Builds strings needed for querying a bucket and filtering results to |  | 
| 236     implement wildcard object name matching. |  | 
| 237 |  | 
| 238     Args: |  | 
| 239       wildcard: The wildcard string to match to objects. |  | 
| 240 |  | 
| 241     Returns: |  | 
| 242       (prefix, delimiter, prefix_wildcard, suffix_wildcard) |  | 
| 243       where: |  | 
| 244         prefix is the prefix to be sent in bucket GET request. |  | 
| 245         delimiter is the delimiter to be sent in bucket GET request. |  | 
| 246         prefix_wildcard is the wildcard to be used to filter bucket GET results. |  | 
| 247         suffix_wildcard is wildcard to be appended to filtered bucket GET |  | 
| 248           results for next wildcard expansion iteration. |  | 
| 249       For example, given the wildcard gs://bucket/abc/d*e/f*.txt we |  | 
| 250       would build prefix= abc/d, delimiter=/, prefix_wildcard=d*e, and |  | 
| 251       suffix_wildcard=f*.txt. Using this prefix and delimiter for a bucket |  | 
| 252       listing request will then produce a listing result set that can be |  | 
| 253       filtered using this prefix_wildcard; and we'd use this suffix_wildcard |  | 
| 254       to feed into the next call(s) to _BuildBucketFilterStrings(), for the |  | 
| 255       next iteration of listing/filtering. |  | 
| 256 |  | 
| 257     Raises: |  | 
| 258       AssertionError if wildcard doesn't contain any wildcard chars. |  | 
| 259     """ |  | 
| 260     # Generate a request prefix if the object name part of the wildcard starts |  | 
| 261     # with a non-wildcard string (e.g., that's true for 'gs://bucket/abc*xyz'). |  | 
| 262     match = WILDCARD_REGEX.search(wildcard) |  | 
| 263     if not match: |  | 
| 264       # Input "wildcard" has no wildcard chars, so just return tuple that will |  | 
| 265       # cause a bucket listing to match the given input wildcard. Example: if |  | 
| 266       # previous iteration yielded gs://bucket/dir/ with suffix_wildcard abc, |  | 
| 267       # the next iteration will call _BuildBucketFilterStrings() with |  | 
| 268       # gs://bucket/dir/abc, and we will return prefix ='dir/abc', |  | 
| 269       # delimiter='/', prefix_wildcard='dir/abc', and suffix_wildcard=''. |  | 
| 270       prefix = wildcard |  | 
| 271       delimiter = '/' |  | 
| 272       prefix_wildcard = wildcard |  | 
| 273       suffix_wildcard = '' |  | 
| 274     else: |  | 
| 275       if match.start() > 0: |  | 
| 276         # Wildcard does not occur at beginning of object name, so construct a |  | 
| 277         # prefix string to send to server. |  | 
| 278         prefix = wildcard[:match.start()] |  | 
| 279         wildcard_part = wildcard[match.start():] |  | 
| 280       else: |  | 
| 281         prefix = None |  | 
| 282         wildcard_part = wildcard |  | 
| 283       end = wildcard_part.find('/') |  | 
| 284       if end != -1: |  | 
| 285         wildcard_part = wildcard_part[:end+1] |  | 
| 286       # Remove trailing '/' so we will match gs://bucket/abc* as well as |  | 
| 287       # gs://bucket/abc*/ with the same wildcard regex. |  | 
| 288       prefix_wildcard = ((prefix or '') + wildcard_part).rstrip('/') |  | 
| 289       suffix_wildcard = wildcard[match.end():] |  | 
| 290       end = suffix_wildcard.find('/') |  | 
| 291       if end == -1: |  | 
| 292         suffix_wildcard = '' |  | 
| 293       else: |  | 
| 294         suffix_wildcard = suffix_wildcard[end+1:] |  | 
| 295       # To implement recursive (**) wildcarding, if prefix_wildcard |  | 
| 296       # suffix_wildcard starts with '**' don't send a delimiter, and combine |  | 
| 297       # suffix_wildcard at end of prefix_wildcard. |  | 
| 298       if prefix_wildcard.find('**') != -1: |  | 
| 299         delimiter = None |  | 
| 300         prefix_wildcard = prefix_wildcard + suffix_wildcard |  | 
| 301         suffix_wildcard = '' |  | 
| 302       else: |  | 
| 303         delimiter = '/' |  | 
| 304         delim_pos = suffix_wildcard.find(delimiter) |  | 
| 305     # The following debug output is useful for tracing how the algorithm |  | 
| 306     # walks through a multi-part wildcard like gs://bucket/abc/d*e/f*.txt |  | 
| 307     if self.debug > 1: |  | 
| 308       sys.stderr.write( |  | 
| 309           'DEBUG: wildcard=%s, prefix=%s, delimiter=%s, ' |  | 
| 310           'prefix_wildcard=%s, suffix_wildcard=%s\n' % |  | 
| 311           (wildcard, prefix, delimiter, prefix_wildcard, suffix_wildcard)) |  | 
| 312     return (prefix, delimiter, prefix_wildcard, suffix_wildcard) |  | 
| 313 |  | 
| 314   def IterKeys(self): |  | 
| 315     """ |  | 
| 316     Convenience iterator that runs underlying iterator and returns Key for each |  | 
| 317     iteration. |  | 
| 318 |  | 
| 319     Yields: |  | 
| 320       Subclass of boto.s3.key.Key, or empty iterator if no matches. |  | 
| 321 |  | 
| 322     Raises: |  | 
| 323       WildcardException: for bucket-only uri. |  | 
| 324     """ |  | 
| 325     for bucket_listing_ref in self. __iter__(): |  | 
| 326       if bucket_listing_ref.HasKey(): |  | 
| 327         yield bucket_listing_ref.GetKey() |  | 
| 328 |  | 
| 329   def IterUris(self): |  | 
| 330     """ |  | 
| 331     Convenience iterator that runs underlying iterator and returns StorageUri |  | 
| 332     for each iteration. |  | 
| 333 |  | 
| 334     Yields: |  | 
| 335       StorageUri, or empty iterator if no matches. |  | 
| 336     """ |  | 
| 337     for bucket_listing_ref in self. __iter__(): |  | 
| 338       yield bucket_listing_ref.GetUri() |  | 
| 339 |  | 
| 340   def IterUrisForKeys(self): |  | 
| 341     """ |  | 
| 342     Convenience iterator that runs underlying iterator and returns the |  | 
| 343     StorageUri for each iterated BucketListingRef that has a Key. |  | 
| 344 |  | 
| 345     Yields: |  | 
| 346       StorageUri, or empty iterator if no matches. |  | 
| 347     """ |  | 
| 348     for bucket_listing_ref in self. __iter__(): |  | 
| 349       if bucket_listing_ref.HasKey(): |  | 
| 350         yield bucket_listing_ref.GetUri() |  | 
| 351 |  | 
| 352 |  | 
| 353 class FileWildcardIterator(WildcardIterator): |  | 
| 354   """WildcardIterator subclass for files and directories. |  | 
| 355 |  | 
| 356   If you use recursive wildcards ('**') only a single such wildcard is |  | 
| 357   supported. For example you could use the wildcard '**/*.txt' to list all .txt |  | 
| 358   files in any subdirectory of the current directory, but you couldn't use a |  | 
| 359   wildcard like '**/abc/**/*.txt' (which would, if supported, let you find .txt |  | 
| 360   files in any subdirectory named 'abc'). |  | 
| 361   """ |  | 
| 362 |  | 
| 363   def __init__(self, wildcard_uri, headers=None, debug=0): |  | 
| 364     """ |  | 
| 365     Instantiate an iterator over BucketListingRefs matching given wildcard URI. |  | 
| 366 |  | 
| 367     Args: |  | 
| 368       wildcard_uri: StorageUri that contains the wildcard to iterate. |  | 
| 369       headers: Dictionary containing optional HTTP headers to pass to boto. |  | 
| 370       debug: Debug level to pass in to boto connection (range 0..3). |  | 
| 371     """ |  | 
| 372     self.wildcard_uri = wildcard_uri |  | 
| 373     self.headers = headers |  | 
| 374     self.debug = debug |  | 
| 375 |  | 
| 376   def __iter__(self): |  | 
| 377     wildcard = self.wildcard_uri.object_name |  | 
| 378     match = re.search('\*\*', wildcard) |  | 
| 379     if match: |  | 
| 380       # Recursive wildcarding request ('.../**/...'). |  | 
| 381       # Example input: wildcard = '/tmp/tmp2pQJAX/**/*' |  | 
| 382       base_dir = wildcard[:match.start()-1] |  | 
| 383       remaining_wildcard = wildcard[match.start()+2:] |  | 
| 384       # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and |  | 
| 385       # remaining_wildcard = '/*' |  | 
| 386       if remaining_wildcard.startswith('*'): |  | 
| 387         raise WildcardException('Invalid wildcard with more than 2 consecutive ' |  | 
| 388                                 '*s (%s)' % wildcard) |  | 
| 389       # If there was no remaining wildcard past the recursive wildcard, |  | 
| 390       # treat it as if it were a '*'. For example, file://tmp/** is equivalent |  | 
| 391       # to file://tmp/**/* |  | 
| 392       if not remaining_wildcard: |  | 
| 393         remaining_wildcard = '*' |  | 
| 394       # Skip slash(es). |  | 
| 395       remaining_wildcard = remaining_wildcard.lstrip(os.sep) |  | 
| 396       filepaths = [] |  | 
| 397       for dirpath, unused_dirnames, filenames in os.walk(base_dir): |  | 
| 398         filepaths.extend( |  | 
| 399             os.path.join(dirpath, f) for f in fnmatch.filter(filenames, |  | 
| 400                                                              remaining_wildcard) |  | 
| 401         ) |  | 
| 402     else: |  | 
| 403       # Not a recursive wildcarding request. |  | 
| 404       filepaths = glob.glob(wildcard) |  | 
| 405     for filepath in filepaths: |  | 
| 406       expanded_uri = self.wildcard_uri.clone_replace_name(filepath) |  | 
| 407       yield BucketListingRef(expanded_uri) |  | 
| 408 |  | 
| 409   def IterKeys(self): |  | 
| 410     """ |  | 
| 411     Placeholder to allow polymorphic use of WildcardIterator. |  | 
| 412 |  | 
| 413     Raises: |  | 
| 414       WildcardException: in all cases. |  | 
| 415     """ |  | 
| 416     raise WildcardException( |  | 
| 417         'Iterating over Keys not possible for file wildcards') |  | 
| 418 |  | 
| 419   def IterUris(self): |  | 
| 420     """ |  | 
| 421     Convenience iterator that runs underlying iterator and returns StorageUri |  | 
| 422     for each iteration. |  | 
| 423 |  | 
| 424     Yields: |  | 
| 425       StorageUri, or empty iterator if no matches. |  | 
| 426     """ |  | 
| 427     for bucket_listing_ref in self. __iter__(): |  | 
| 428       yield bucket_listing_ref.GetUri() |  | 
| 429 |  | 
| 430 |  | 
| 431 class WildcardException(StandardError): |  | 
| 432   """Exception thrown for invalid wildcard URIs.""" |  | 
| 433 |  | 
| 434   def __init__(self, reason): |  | 
| 435     StandardError.__init__(self) |  | 
| 436     self.reason = reason |  | 
| 437 |  | 
| 438   def __repr__(self): |  | 
| 439     return 'WildcardException: %s' % self.reason |  | 
| 440 |  | 
| 441   def __str__(self): |  | 
| 442     return 'WildcardException: %s' % self.reason |  | 
| 443 |  | 
| 444 |  | 
| 445 def wildcard_iterator(uri_or_str, proj_id_handler, |  | 
| 446                       bucket_storage_uri_class=BucketStorageUri, |  | 
| 447                       all_versions=False, |  | 
| 448                       headers=None, debug=0): |  | 
| 449   """Instantiate a WildCardIterator for the given StorageUri. |  | 
| 450 |  | 
| 451   Args: |  | 
| 452     uri_or_str: StorageUri or URI string naming wildcard objects to iterate. |  | 
| 453     proj_id_handler: ProjectIdHandler to use for current command. |  | 
| 454     bucket_storage_uri_class: BucketStorageUri interface. |  | 
| 455         Settable for testing/mocking. |  | 
| 456     headers: Dictionary containing optional HTTP headers to pass to boto. |  | 
| 457     debug: Debug level to pass in to boto connection (range 0..3). |  | 
| 458 |  | 
| 459   Returns: |  | 
| 460     A WildcardIterator that handles the requested iteration. |  | 
| 461   """ |  | 
| 462 |  | 
| 463   if isinstance(uri_or_str, basestring): |  | 
| 464     # Disable enforce_bucket_naming, to allow bucket names containing wildcard |  | 
| 465     # chars. |  | 
| 466     uri = boto.storage_uri( |  | 
| 467         uri_or_str, debug=debug, validate=False, |  | 
| 468         bucket_storage_uri_class=bucket_storage_uri_class, |  | 
| 469         suppress_consec_slashes=False) |  | 
| 470   else: |  | 
| 471     uri = uri_or_str |  | 
| 472 |  | 
| 473   if uri.is_cloud_uri(): |  | 
| 474     return CloudWildcardIterator( |  | 
| 475         uri, proj_id_handler, |  | 
| 476         bucket_storage_uri_class=bucket_storage_uri_class, |  | 
| 477         all_versions=all_versions, |  | 
| 478         headers=headers, |  | 
| 479         debug=debug) |  | 
| 480   elif uri.is_file_uri(): |  | 
| 481     return FileWildcardIterator(uri, headers=headers, debug=debug) |  | 
| 482   else: |  | 
| 483     raise WildcardException('Unexpected type of StorageUri (%s)' % uri) |  | 
| 484 |  | 
| 485 |  | 
| 486 def ContainsWildcard(uri_or_str): |  | 
| 487   """Checks whether uri_or_str contains a wildcard. |  | 
| 488 |  | 
| 489   Args: |  | 
| 490     uri_or_str: StorageUri or URI string to check. |  | 
| 491 |  | 
| 492   Returns: |  | 
| 493     bool indicator. |  | 
| 494   """ |  | 
| 495   if isinstance(uri_or_str, basestring): |  | 
| 496     return bool(WILDCARD_REGEX.search(uri_or_str)) |  | 
| 497   else: |  | 
| 498     return bool(WILDCARD_REGEX.search(uri_or_str.uri)) |  | 
| OLD | NEW | 
|---|