OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # |
| 3 # Copyright 2010 Google Inc. |
| 4 # |
| 5 # Permission is hereby granted, free of charge, to any person obtaining a |
| 6 # copy of this software and associated documentation files (the |
| 7 # "Software"), to deal in the Software without restriction, including |
| 8 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 9 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 10 # persons to whom the Software is furnished to do so, subject to the fol- |
| 11 # lowing conditions: |
| 12 # |
| 13 # The above copyright notice and this permission notice shall be included |
| 14 # in all copies or substantial portions of the Software. |
| 15 # |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 22 # IN THE SOFTWARE. |
| 23 |
| 24 """Unit tests for gslib wildcard_iterator""" |
| 25 |
| 26 import os |
| 27 import shutil |
| 28 import sys |
| 29 import tempfile |
| 30 import time |
| 31 import unittest |
| 32 |
| 33 # Put local libs at front of path so tests will run latest lib code rather |
| 34 # than whatever code is found on user's PYTHONPATH. |
| 35 sys.path.insert(0, '.') |
| 36 sys.path.insert(0, 'boto') |
| 37 from boto import InvalidUriError |
| 38 from gslib import test_util |
| 39 from gslib import wildcard_iterator |
| 40 from gslib.project_id import ProjectIdHandler |
| 41 from tests.integration.s3 import mock_storage_service |
| 42 from wildcard_iterator import ContainsWildcard |
| 43 |
| 44 |
| 45 class CloudWildcardIteratorTests(unittest.TestCase): |
| 46 """CloudWildcardIterator test suite""" |
| 47 |
| 48 def GetSuiteDescription(self): |
| 49 return 'CloudWildcardIterator test suite' |
| 50 |
| 51 @classmethod |
| 52 def SetUpClass(cls): |
| 53 """Creates 2 mock buckets, each containing 4 objects, including 1 nested.""" |
| 54 cls.immed_child_obj_names = ['abcd', 'abdd', 'ade$'] |
| 55 cls.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1', |
| 56 'nested1/nested2/xyz2', 'nested1/nfile_abc'] |
| 57 cls.base_uri_str = 'gs://gslib_test_%d' % int(time.time()) |
| 58 cls.test_bucket0_uri, cls.test_bucket0_obj_uri_strs = ( |
| 59 cls.__SetUpOneMockBucket(0) |
| 60 ) |
| 61 cls.test_bucket1_uri, cls.test_bucket1_obj_uri_strs = ( |
| 62 cls.__SetUpOneMockBucket(1) |
| 63 ) |
| 64 cls.created_test_data = True |
| 65 |
| 66 @classmethod |
| 67 def __SetUpOneMockBucket(cls, bucket_num): |
| 68 """Creates a mock bucket containing 4 objects, including 1 nested. |
| 69 Args: |
| 70 bucket_num: Number for building bucket name. |
| 71 |
| 72 Returns: |
| 73 tuple: (bucket name, set of object URI strings). |
| 74 """ |
| 75 bucket_uri = test_util.test_storage_uri( |
| 76 '%s_%s' % (cls.base_uri_str, bucket_num)) |
| 77 bucket_uri.create_bucket() |
| 78 obj_uri_strs = set() |
| 79 for obj_name in cls.all_obj_names: |
| 80 obj_uri = test_util.test_storage_uri('%s%s' % (bucket_uri, obj_name)) |
| 81 key = obj_uri.new_key() |
| 82 key.set_contents_from_string('') |
| 83 obj_uri_strs.add(str(obj_uri)) |
| 84 return (bucket_uri, obj_uri_strs) |
| 85 |
| 86 @classmethod |
| 87 def TearDownClass(cls): |
| 88 """Cleans up bucket and objects created by SetUpClass""" |
| 89 if hasattr(cls, 'created_test_data'): |
| 90 for test_obj_uri_str in cls.test_bucket0_obj_uri_strs: |
| 91 test_util.test_storage_uri(test_obj_uri_str).delete_key() |
| 92 for test_obj_uri_str in cls.test_bucket1_obj_uri_strs: |
| 93 test_util.test_storage_uri(test_obj_uri_str).delete_key() |
| 94 cls.test_bucket0_uri.delete_bucket() |
| 95 cls.test_bucket1_uri.delete_bucket() |
| 96 |
| 97 def TestNoOpObjectIterator(self): |
| 98 """Tests that bucket-only URI iterates just that one URI""" |
| 99 results = list( |
| 100 test_util.test_wildcard_iterator(self.test_bucket0_uri).IterUris()) |
| 101 self.assertEqual(1, len(results)) |
| 102 self.assertEqual(str(self.test_bucket0_uri), str(results[0])) |
| 103 |
| 104 def TestMatchingAllObjects(self): |
| 105 """Tests matching all objects, based on wildcard""" |
| 106 actual_obj_uri_strs = set( |
| 107 str(u) for u in test_util.test_wildcard_iterator( |
| 108 self.test_bucket0_uri.clone_replace_name('**')).IterUris()) |
| 109 self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs) |
| 110 |
| 111 def TestMatchingObjectSubset(self): |
| 112 """Tests matching a subset of objects, based on wildcard""" |
| 113 exp_obj_uri_strs = set( |
| 114 [str(self.test_bucket0_uri.clone_replace_name('abcd')), |
| 115 str(self.test_bucket0_uri.clone_replace_name('abdd'))]) |
| 116 actual_obj_uri_strs = set( |
| 117 str(u) for u in test_util.test_wildcard_iterator( |
| 118 self.test_bucket0_uri.clone_replace_name('ab??')).IterUris()) |
| 119 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 120 |
| 121 def TestMatchingNonWildcardedUri(self): |
| 122 """Tests matching a single named object""" |
| 123 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd') |
| 124 )]) |
| 125 actual_obj_uri_strs = set( |
| 126 str(u) for u in test_util.test_wildcard_iterator( |
| 127 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris()) |
| 128 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 129 |
| 130 def TestWildcardedObjectUriWithVsWithoutPrefix(self): |
| 131 """Tests that wildcarding w/ and w/o server prefix get same result""" |
| 132 # (It's just more efficient to query w/o a prefix; wildcard |
| 133 # iterator will filter the matches either way.) |
| 134 with_prefix_uri_strs = set( |
| 135 str(u) for u in test_util.test_wildcard_iterator( |
| 136 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris()) |
| 137 # By including a wildcard at the start of the string no prefix can be |
| 138 # used in server request. |
| 139 no_prefix_uri_strs = set( |
| 140 str(u) for u in test_util.test_wildcard_iterator( |
| 141 self.test_bucket0_uri.clone_replace_name('?bcd')).IterUris()) |
| 142 self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs) |
| 143 |
| 144 def TestWildcardedObjectUriNestedSubdirMatch(self): |
| 145 """Tests wildcarding with a nested subdir""" |
| 146 uri_strs = set() |
| 147 prefixes = set() |
| 148 for blr in test_util.test_wildcard_iterator( |
| 149 self.test_bucket0_uri.clone_replace_name('*')): |
| 150 if blr.HasPrefix(): |
| 151 prefixes.add(blr.GetPrefix().name) |
| 152 else: |
| 153 uri_strs.add(blr.GetUri().uri) |
| 154 exp_obj_uri_strs = set(['%s_0/%s' % (self.base_uri_str, x) |
| 155 for x in self.immed_child_obj_names]) |
| 156 self.assertEqual(exp_obj_uri_strs, uri_strs) |
| 157 self.assertEqual(1, len(prefixes)) |
| 158 self.assertTrue('nested1/' in prefixes) |
| 159 |
| 160 def TestWildcardedObjectUriNestedSubSubdirMatch(self): |
| 161 """Tests wildcarding with a nested sub-subdir""" |
| 162 for final_char in ('', '/'): |
| 163 uri_strs = set() |
| 164 prefixes = set() |
| 165 for blr in test_util.test_wildcard_iterator( |
| 166 self.test_bucket0_uri.clone_replace_name('nested1/*%s' % final_char)): |
| 167 if blr.HasPrefix(): |
| 168 prefixes.add(blr.GetPrefix().name) |
| 169 else: |
| 170 uri_strs.add(blr.GetUri().uri) |
| 171 self.assertEqual(1, len(uri_strs)) |
| 172 self.assertEqual(1, len(prefixes)) |
| 173 self.assertTrue('nested1/nested2/' in prefixes) |
| 174 |
| 175 def TestWildcardPlusSubdirMatch(self): |
| 176 """Tests gs://bucket/*/subdir matching""" |
| 177 actual_uri_strs = set() |
| 178 actual_prefixes = set() |
| 179 for blr in test_util.test_wildcard_iterator( |
| 180 self.test_bucket0_uri.clone_replace_name('*/nested1')): |
| 181 if blr.HasPrefix(): |
| 182 actual_prefixes.add(blr.GetPrefix().name) |
| 183 else: |
| 184 actual_uri_strs.add(blr.GetUri().uri) |
| 185 expected_uri_strs = set() |
| 186 expected_prefixes = set(['nested1/']) |
| 187 self.assertEqual(expected_prefixes, actual_prefixes) |
| 188 self.assertEqual(expected_uri_strs, actual_uri_strs) |
| 189 |
| 190 def TestWildcardPlusSubdirSubdirMatch(self): |
| 191 """Tests gs://bucket/*/subdir/* matching""" |
| 192 actual_uri_strs = set() |
| 193 actual_prefixes = set() |
| 194 for blr in test_util.test_wildcard_iterator( |
| 195 self.test_bucket0_uri.clone_replace_name('*/nested2/*')): |
| 196 if blr.HasPrefix(): |
| 197 actual_prefixes.add(blr.GetPrefix().name) |
| 198 else: |
| 199 actual_uri_strs.add(blr.GetUri().uri) |
| 200 expected_uri_strs = set([ |
| 201 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri, |
| 202 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri]) |
| 203 expected_prefixes = set() |
| 204 self.assertEqual(expected_prefixes, actual_prefixes) |
| 205 self.assertEqual(expected_uri_strs, actual_uri_strs) |
| 206 |
| 207 def TestNoMatchingWildcardedObjectUri(self): |
| 208 """Tests that get back an empty iterator for non-matching wildcarded URI""" |
| 209 res = list(test_util.test_wildcard_iterator( |
| 210 self.test_bucket0_uri.clone_replace_name('*x0')).IterUris()) |
| 211 self.assertEqual(0, len(res)) |
| 212 |
| 213 def TestWildcardedInvalidObjectUri(self): |
| 214 """Tests that we raise an exception for wildcarded invalid URI""" |
| 215 try: |
| 216 for unused_ in test_util.test_wildcard_iterator( |
| 217 'badscheme://asdf').IterUris(): |
| 218 self.assertFalse('Expected InvalidUriError not raised.') |
| 219 except InvalidUriError, e: |
| 220 # Expected behavior. |
| 221 self.assertTrue(e.message.find('Unrecognized scheme') != -1) |
| 222 |
| 223 def TestSingleMatchWildcardedBucketUri(self): |
| 224 """Tests matching a single bucket based on a wildcarded bucket URI""" |
| 225 exp_obj_uri_strs = set(['%s_1/' % self.base_uri_str]) |
| 226 actual_obj_uri_strs = set( |
| 227 str(u) for u in test_util.test_wildcard_iterator( |
| 228 '%s*1' % self.base_uri_str).IterUris()) |
| 229 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 230 |
| 231 def TestMultiMatchWildcardedBucketUri(self): |
| 232 """Tests matching a multiple buckets based on a wildcarded bucket URI""" |
| 233 exp_obj_uri_strs = set(['%s_%s/' % |
| 234 (self.base_uri_str, i) for i in range(2)]) |
| 235 actual_obj_uri_strs = set( |
| 236 str(u) for u in test_util.test_wildcard_iterator( |
| 237 '%s*' % self.base_uri_str).IterUris()) |
| 238 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 239 |
| 240 def TestWildcardBucketAndObjectUri(self): |
| 241 """Tests matching with both bucket and object wildcards""" |
| 242 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 243 'abcd'))]) |
| 244 actual_obj_uri_strs = set( |
| 245 str(u) for u in test_util.test_wildcard_iterator( |
| 246 '%s_0*/abc*' % self.base_uri_str).IterUris()) |
| 247 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 248 |
| 249 def TestWildcardUpToFinalCharSubdirPlusObjectName(self): |
| 250 """Tests wildcard subd*r/obj name""" |
| 251 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 252 'nested1/nested2/xyz1'))]) |
| 253 actual_obj_uri_strs = set( |
| 254 str(u) for u in test_util.test_wildcard_iterator( |
| 255 '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterUris()) |
| 256 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 257 |
| 258 def TestPostRecursiveWildcard(self): |
| 259 """Tests that wildcard containing ** followed by an additional wildcard work
s""" |
| 260 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 261 'nested1/nested2/xyz2'))]) |
| 262 actual_obj_uri_strs = set( |
| 263 str(u) for u in test_util.test_wildcard_iterator( |
| 264 '%s**/*y*2' % self.test_bucket0_uri.uri).IterUris()) |
| 265 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 266 |
| 267 def TestCallingGetKeyOnProviderOnlyWildcardIteration(self): |
| 268 """Tests that attempting iterating provider-only wildcard raises""" |
| 269 try: |
| 270 from gslib.bucket_listing_ref import BucketListingRefException |
| 271 for iter_result in wildcard_iterator.wildcard_iterator( |
| 272 'gs://', ProjectIdHandler(), |
| 273 bucket_storage_uri_class=mock_storage_service.MockBucketStorageUri): |
| 274 iter_result.GetKey() |
| 275 self.fail('Expected BucketListingRefException not raised.') |
| 276 except BucketListingRefException, e: |
| 277 self.assertTrue(str(e).find( |
| 278 'Attempt to call GetKey() on Key-less BucketListingRef') != -1) |
| 279 |
| 280 |
| 281 class FileIteratorTests(unittest.TestCase): |
| 282 """FileWildcardIterator test suite""" |
| 283 |
| 284 def GetSuiteDescription(self): |
| 285 return 'FileWildcardIterator test suite' |
| 286 |
| 287 @classmethod |
| 288 def SetUpClass(cls): |
| 289 """ |
| 290 Creates a test dir containing 3 files and one nested subdirectory + file. |
| 291 """ |
| 292 |
| 293 # Create the test directories. |
| 294 cls.test_dir = tempfile.mkdtemp() |
| 295 nested_subdir = '%s%sdir1%sdir2' % (cls.test_dir, os.sep, os.sep) |
| 296 os.makedirs(nested_subdir) |
| 297 |
| 298 # Create the test files. |
| 299 immed_child_filenames = ['abcd', 'abdd', 'ade$', 'dir1'] |
| 300 immed_child_filepaths = ['%s%s%s' % (cls.test_dir, os.sep, f) |
| 301 for f in immed_child_filenames] |
| 302 filenames = ['abcd', 'abdd', 'ade$', 'dir1%sdir2%szzz' % (os.sep, os.sep)] |
| 303 filepaths = ['%s%s%s' % (cls.test_dir, os.sep, f) for f in filenames] |
| 304 for filepath in filepaths: |
| 305 open(filepath, 'w') |
| 306 |
| 307 # Set up global test variables. |
| 308 cls.immed_child_uri_strs = set( |
| 309 os.path.join('file://%s' % f) for f in immed_child_filepaths |
| 310 ) |
| 311 |
| 312 cls.all_file_uri_strs = set( |
| 313 [('file://%s' % o) for o in filepaths] |
| 314 ) |
| 315 |
| 316 cls.all_uri_strs = set( |
| 317 ['file://%s' % nested_subdir] |
| 318 ).union(cls.all_file_uri_strs) |
| 319 |
| 320 @classmethod |
| 321 def TearDownClass(cls): |
| 322 """Cleans up test dir and file created by SetUpClass""" |
| 323 if hasattr(cls, 'test_dir'): |
| 324 shutil.rmtree(cls.test_dir) |
| 325 |
| 326 def TestContainsWildcard(self): |
| 327 """Tests ContainsWildcard call""" |
| 328 self.assertTrue(ContainsWildcard('a*.txt')) |
| 329 self.assertTrue(ContainsWildcard('a[0-9].txt')) |
| 330 self.assertFalse(ContainsWildcard('0-9.txt')) |
| 331 self.assertTrue(ContainsWildcard('?.txt')) |
| 332 |
| 333 def TestNoOpDirectoryIterator(self): |
| 334 """Tests that directory-only URI iterates just that one URI""" |
| 335 results = list(test_util.test_wildcard_iterator('file:///tmp/').IterUris()) |
| 336 self.assertEqual(1, len(results)) |
| 337 self.assertEqual('file:///tmp/', str(results[0])) |
| 338 |
| 339 def TestMatchingAllFiles(self): |
| 340 """Tests matching all files, based on wildcard""" |
| 341 uri = test_util.test_storage_uri('file://%s/*' % self.test_dir) |
| 342 actual_uri_strs = set(str(u) for u in |
| 343 test_util.test_wildcard_iterator(uri).IterUris() |
| 344 ) |
| 345 self.assertEqual(self.immed_child_uri_strs, actual_uri_strs) |
| 346 |
| 347 def TestMatchingFileSubset(self): |
| 348 """Tests matching a subset of files, based on wildcard""" |
| 349 exp_uri_strs = set( |
| 350 ['file://%s/abcd' % self.test_dir, 'file://%s/abdd' % self.test_dir] |
| 351 ) |
| 352 uri = test_util.test_storage_uri('file://%s/ab??' % self.test_dir) |
| 353 actual_uri_strs = set(str(u) for u in |
| 354 test_util.test_wildcard_iterator(uri).IterUris() |
| 355 ) |
| 356 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 357 |
| 358 def TestMatchingNonWildcardedUri(self): |
| 359 """Tests matching a single named file""" |
| 360 exp_uri_strs = set(['file://%s/abcd' % self.test_dir]) |
| 361 uri = test_util.test_storage_uri('file://%s/abcd' % self.test_dir) |
| 362 actual_uri_strs = set( |
| 363 str(u) for u in test_util.test_wildcard_iterator(uri).IterUris()) |
| 364 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 365 |
| 366 def TestMatchingFilesIgnoringOtherRegexChars(self): |
| 367 """Tests ignoring non-wildcard regex chars (e.g., ^ and $)""" |
| 368 |
| 369 exp_uri_strs = set(['file://%s/ade$' % self.test_dir]) |
| 370 uri = test_util.test_storage_uri('file://%s/ad*$' % self.test_dir) |
| 371 actual_uri_strs = set( |
| 372 str(u) for u in test_util.test_wildcard_iterator(uri).IterUris()) |
| 373 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 374 |
| 375 def TestRecursiveDirectoryOnlyWildcarding(self): |
| 376 """Tests recusive expansion of directory-only '**' wildcard""" |
| 377 uri = test_util.test_storage_uri('file://%s/**' % self.test_dir) |
| 378 actual_uri_strs = set( |
| 379 str(u) for u in test_util.test_wildcard_iterator(uri).IterUris()) |
| 380 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) |
| 381 |
| 382 def TestRecursiveDirectoryPlusFileWildcarding(self): |
| 383 """Tests recusive expansion of '**' directory plus '*' wildcard""" |
| 384 uri = test_util.test_storage_uri('file://%s/**/*' % self.test_dir) |
| 385 actual_uri_strs = set( |
| 386 str(u) for u in test_util.test_wildcard_iterator(uri).IterUris()) |
| 387 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) |
| 388 |
| 389 def TestInvalidRecursiveDirectoryWildcard(self): |
| 390 """Tests that wildcard containing '***' raises exception""" |
| 391 try: |
| 392 uri = test_util.test_storage_uri('file://%s/***/abcd' % self.test_dir) |
| 393 for unused_ in test_util.test_wildcard_iterator(uri).IterUris(): |
| 394 self.fail('Expected WildcardException not raised.') |
| 395 except wildcard_iterator.WildcardException, e: |
| 396 # Expected behavior. |
| 397 self.assertTrue(str(e).find('more than 2 consecutive') != -1) |
| 398 |
| 399 def TestMissingDir(self): |
| 400 """Tests that wildcard gets empty iterator when directory doesn't exist""" |
| 401 res = list( |
| 402 test_util.test_wildcard_iterator('file://no_such_dir/*').IterUris()) |
| 403 self.assertEqual(0, len(res)) |
| 404 |
| 405 def TestExistingDirNoFileMatch(self): |
| 406 """Tests that wildcard returns empty iterator when there's no match""" |
| 407 uri = test_util.test_storage_uri( |
| 408 'file://%s/non_existent*' % self.test_dir) |
| 409 res = list(test_util.test_wildcard_iterator(uri).IterUris()) |
| 410 self.assertEqual(0, len(res)) |
| 411 |
| 412 |
| 413 if __name__ == '__main__': |
| 414 if sys.version_info[:3] < (2, 5, 1): |
| 415 sys.exit('These tests must be run on at least Python 2.5.1\n') |
| 416 test_loader = unittest.TestLoader() |
| 417 test_loader.testMethodPrefix = 'Test' |
| 418 for suite in (test_loader.loadTestsFromTestCase(CloudWildcardIteratorTests), |
| 419 test_loader.loadTestsFromTestCase(FileIteratorTests)): |
| 420 # Seems like there should be a cleaner way to find the test_class. |
| 421 test_class = suite.__getattribute__('_tests')[0] |
| 422 # We call SetUpClass() and TearDownClass() ourselves because we |
| 423 # don't assume the user has Python 2.7 (which supports classmethods |
| 424 # that do it, with camelCase versions of these names). |
| 425 try: |
| 426 print 'Setting up %s...' % test_class.GetSuiteDescription() |
| 427 test_class.SetUpClass() |
| 428 print 'Running %s...' % test_class.GetSuiteDescription() |
| 429 unittest.TextTestRunner(verbosity=2).run(suite) |
| 430 finally: |
| 431 print 'Cleaning up after %s...' % test_class.GetSuiteDescription() |
| 432 test_class.TearDownClass() |
| 433 print '' |
OLD | NEW |