OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2010 Google Inc. All Rights Reserved. |
| 3 # |
| 4 # Permission is hereby granted, free of charge, to any person obtaining a |
| 5 # copy of this software and associated documentation files (the |
| 6 # "Software"), to deal in the Software without restriction, including |
| 7 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 8 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 9 # persons to whom the Software is furnished to do so, subject to the fol- |
| 10 # lowing conditions: |
| 11 # |
| 12 # The above copyright notice and this permission notice shall be included |
| 13 # in all copies or substantial portions of the Software. |
| 14 # |
| 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 21 # IN THE SOFTWARE. |
| 22 """Unit tests for gsutil wildcard_iterator.""" |
| 23 |
| 24 from __future__ import absolute_import |
| 25 |
| 26 import tempfile |
| 27 |
| 28 from gslib import wildcard_iterator |
| 29 from gslib.exception import InvalidUrlError |
| 30 from gslib.storage_url import ContainsWildcard |
| 31 import gslib.tests.testcase as testcase |
| 32 from gslib.tests.util import ObjectToURI as suri |
| 33 |
| 34 |
| 35 class CloudWildcardIteratorTests(testcase.GsUtilUnitTestCase): |
| 36 """Unit tests for CloudWildcardIterator.""" |
| 37 |
| 38 def setUp(self): |
| 39 """Creates 2 mock buckets, each containing 4 objects, including 1 nested.""" |
| 40 super(CloudWildcardIteratorTests, self).setUp() |
| 41 self.immed_child_obj_names = ['abcd', 'abdd', 'ade$'] |
| 42 self.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1', |
| 43 'nested1/nested2/xyz2', 'nested1/nfile_abc'] |
| 44 |
| 45 self.base_bucket_uri = self.CreateBucket() |
| 46 self.prefix_bucket_name = '%s_' % self.base_bucket_uri.bucket_name[:61] |
| 47 self.base_uri_str = suri(self.base_bucket_uri) |
| 48 self.base_uri_str = self.base_uri_str.replace( |
| 49 self.base_bucket_uri.bucket_name, self.prefix_bucket_name) |
| 50 |
| 51 self.test_bucket0_uri = self.CreateBucket( |
| 52 bucket_name='%s0' % self.prefix_bucket_name) |
| 53 self.test_bucket0_obj_uri_strs = set() |
| 54 for obj_name in self.all_obj_names: |
| 55 obj_uri = self.CreateObject(bucket_uri=self.test_bucket0_uri, |
| 56 object_name=obj_name, contents='') |
| 57 self.test_bucket0_obj_uri_strs.add(suri(obj_uri)) |
| 58 |
| 59 self.test_bucket1_uri = self.CreateBucket( |
| 60 bucket_name='%s1' % self.prefix_bucket_name) |
| 61 self.test_bucket1_obj_uri_strs = set() |
| 62 for obj_name in self.all_obj_names: |
| 63 obj_uri = self.CreateObject(bucket_uri=self.test_bucket1_uri, |
| 64 object_name=obj_name, contents='') |
| 65 self.test_bucket1_obj_uri_strs.add(suri(obj_uri)) |
| 66 |
| 67 def testNoOpObjectIterator(self): |
| 68 """Tests that bucket-only URI iterates just that one URI.""" |
| 69 results = list( |
| 70 self._test_wildcard_iterator(self.test_bucket0_uri).IterBuckets( |
| 71 bucket_fields=['id'])) |
| 72 self.assertEqual(1, len(results)) |
| 73 self.assertEqual(str(self.test_bucket0_uri), str(results[0])) |
| 74 |
| 75 def testMatchingAllObjects(self): |
| 76 """Tests matching all objects, based on wildcard.""" |
| 77 actual_obj_uri_strs = set( |
| 78 str(u) for u in self._test_wildcard_iterator( |
| 79 self.test_bucket0_uri.clone_replace_name('**')).IterAll( |
| 80 expand_top_level_buckets=True)) |
| 81 self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs) |
| 82 |
| 83 def testMatchingObjectSubset(self): |
| 84 """Tests matching a subset of objects, based on wildcard.""" |
| 85 exp_obj_uri_strs = set( |
| 86 [str(self.test_bucket0_uri.clone_replace_name('abcd')), |
| 87 str(self.test_bucket0_uri.clone_replace_name('abdd'))]) |
| 88 actual_obj_uri_strs = set( |
| 89 str(u) for u in self._test_wildcard_iterator( |
| 90 self.test_bucket0_uri.clone_replace_name('ab??')).IterAll( |
| 91 expand_top_level_buckets=True)) |
| 92 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 93 |
| 94 def testMatchingNonWildcardedUri(self): |
| 95 """Tests matching a single named object.""" |
| 96 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd') |
| 97 )]) |
| 98 actual_obj_uri_strs = set( |
| 99 str(u) for u in self._test_wildcard_iterator( |
| 100 self.test_bucket0_uri.clone_replace_name('abcd')).IterAll( |
| 101 expand_top_level_buckets=True)) |
| 102 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 103 |
| 104 def testWildcardedObjectUriWithVsWithoutPrefix(self): |
| 105 """Tests that wildcarding w/ and w/o server prefix get same result.""" |
| 106 # (It's just more efficient to query w/o a prefix; wildcard |
| 107 # iterator will filter the matches either way.) |
| 108 with_prefix_uri_strs = set( |
| 109 str(u) for u in self._test_wildcard_iterator( |
| 110 self.test_bucket0_uri.clone_replace_name('abcd')).IterAll( |
| 111 expand_top_level_buckets=True)) |
| 112 # By including a wildcard at the start of the string no prefix can be |
| 113 # used in server request. |
| 114 no_prefix_uri_strs = set( |
| 115 str(u) for u in self._test_wildcard_iterator( |
| 116 self.test_bucket0_uri.clone_replace_name('?bcd')).IterAll( |
| 117 expand_top_level_buckets=True)) |
| 118 self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs) |
| 119 |
| 120 def testWildcardedObjectUriNestedSubdirMatch(self): |
| 121 """Tests wildcarding with a nested subdir.""" |
| 122 uri_strs = set() |
| 123 prefixes = set() |
| 124 for blr in self._test_wildcard_iterator( |
| 125 self.test_bucket0_uri.clone_replace_name('*')): |
| 126 if blr.IsPrefix(): |
| 127 prefixes.add(blr.root_object) |
| 128 else: |
| 129 uri_strs.add(blr.url_string) |
| 130 exp_obj_uri_strs = set([suri(self.test_bucket0_uri, x) |
| 131 for x in self.immed_child_obj_names]) |
| 132 self.assertEqual(exp_obj_uri_strs, uri_strs) |
| 133 self.assertEqual(1, len(prefixes)) |
| 134 self.assertTrue('nested1/' in prefixes) |
| 135 |
| 136 def testWildcardPlusSubdirMatch(self): |
| 137 """Tests gs://bucket/*/subdir matching.""" |
| 138 actual_uri_strs = set() |
| 139 actual_prefixes = set() |
| 140 for blr in self._test_wildcard_iterator( |
| 141 self.test_bucket0_uri.clone_replace_name('*/nested1')): |
| 142 if blr.IsPrefix(): |
| 143 actual_prefixes.add(blr.root_object) |
| 144 else: |
| 145 actual_uri_strs.add(blr.url_string) |
| 146 expected_uri_strs = set() |
| 147 expected_prefixes = set(['nested1/']) |
| 148 self.assertEqual(expected_prefixes, actual_prefixes) |
| 149 self.assertEqual(expected_uri_strs, actual_uri_strs) |
| 150 |
| 151 def testWildcardPlusSubdirSubdirMatch(self): |
| 152 """Tests gs://bucket/*/subdir/* matching.""" |
| 153 actual_uri_strs = set() |
| 154 actual_prefixes = set() |
| 155 for blr in self._test_wildcard_iterator( |
| 156 self.test_bucket0_uri.clone_replace_name('*/nested2/*')): |
| 157 if blr.IsPrefix(): |
| 158 actual_prefixes.add(blr.root_object) |
| 159 else: |
| 160 actual_uri_strs.add(blr.url_string) |
| 161 expected_uri_strs = set([ |
| 162 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri, |
| 163 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri]) |
| 164 expected_prefixes = set() |
| 165 self.assertEqual(expected_prefixes, actual_prefixes) |
| 166 self.assertEqual(expected_uri_strs, actual_uri_strs) |
| 167 |
| 168 def testNoMatchingWildcardedObjectUri(self): |
| 169 """Tests that get back an empty iterator for non-matching wildcarded URI.""" |
| 170 res = list(self._test_wildcard_iterator( |
| 171 self.test_bucket0_uri.clone_replace_name('*x0')).IterAll( |
| 172 expand_top_level_buckets=True)) |
| 173 self.assertEqual(0, len(res)) |
| 174 |
| 175 def testWildcardedInvalidObjectUri(self): |
| 176 """Tests that we raise an exception for wildcarded invalid URI.""" |
| 177 try: |
| 178 for unused_ in self._test_wildcard_iterator( |
| 179 'badscheme://asdf').IterAll(expand_top_level_buckets=True): |
| 180 self.assertFalse('Expected InvalidUrlError not raised.') |
| 181 except InvalidUrlError, e: |
| 182 # Expected behavior. |
| 183 self.assertTrue(e.message.find('Unrecognized scheme') != -1) |
| 184 |
| 185 def testSingleMatchWildcardedBucketUri(self): |
| 186 """Tests matching a single bucket based on a wildcarded bucket URI.""" |
| 187 exp_obj_uri_strs = set([ |
| 188 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) |
| 189 actual_obj_uri_strs = set( |
| 190 str(u) for u in self._test_wildcard_iterator( |
| 191 '%s*1' % self.base_uri_str).IterBuckets(bucket_fields=['id'])) |
| 192 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 193 |
| 194 def testMultiMatchWildcardedBucketUri(self): |
| 195 """Tests matching a multiple buckets based on a wildcarded bucket URI.""" |
| 196 exp_obj_uri_strs = set([ |
| 197 suri(self.test_bucket0_uri) + self.test_bucket0_uri.delim, |
| 198 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) |
| 199 actual_obj_uri_strs = set( |
| 200 str(u) for u in self._test_wildcard_iterator( |
| 201 '%s*' % self.base_uri_str).IterBuckets(bucket_fields=['id'])) |
| 202 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 203 |
| 204 def testWildcardBucketAndObjectUri(self): |
| 205 """Tests matching with both bucket and object wildcards.""" |
| 206 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 207 'abcd'))]) |
| 208 actual_obj_uri_strs = set( |
| 209 str(u) for u in self._test_wildcard_iterator( |
| 210 '%s0*/abc*' % self.base_uri_str).IterAll( |
| 211 expand_top_level_buckets=True)) |
| 212 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 213 |
| 214 def testWildcardUpToFinalCharSubdirPlusObjectName(self): |
| 215 """Tests wildcard subd*r/obj name.""" |
| 216 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 217 'nested1/nested2/xyz1'))]) |
| 218 actual_obj_uri_strs = set( |
| 219 str(u) for u in self._test_wildcard_iterator( |
| 220 '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterAll( |
| 221 expand_top_level_buckets=True)) |
| 222 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 223 |
| 224 def testPostRecursiveWildcard(self): |
| 225 """Tests wildcard containing ** followed by an additional wildcard.""" |
| 226 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 227 'nested1/nested2/xyz2'))]) |
| 228 actual_obj_uri_strs = set( |
| 229 str(u) for u in self._test_wildcard_iterator( |
| 230 '%s**/*y*2' % self.test_bucket0_uri.uri).IterAll( |
| 231 expand_top_level_buckets=True)) |
| 232 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 233 |
| 234 def testWildcardFields(self): |
| 235 """Tests that wildcard w/fields specification returns correct fields.""" |
| 236 blrs = set( |
| 237 u for u in self._test_wildcard_iterator( |
| 238 self.test_bucket0_uri.clone_replace_name('**')).IterAll( |
| 239 bucket_listing_fields=['updated'])) |
| 240 self.assertTrue(len(blrs)) |
| 241 for blr in blrs: |
| 242 self.assertTrue(blr.root_object and blr.root_object.updated) |
| 243 blrs = set( |
| 244 u for u in self._test_wildcard_iterator( |
| 245 self.test_bucket0_uri.clone_replace_name('**')).IterAll( |
| 246 bucket_listing_fields=['generation'])) |
| 247 self.assertTrue(len(blrs)) |
| 248 for blr in blrs: |
| 249 self.assertTrue(blr.root_object and not blr.root_object.updated) |
| 250 |
| 251 |
| 252 class FileIteratorTests(testcase.GsUtilUnitTestCase): |
| 253 """Unit tests for FileWildcardIterator.""" |
| 254 |
| 255 def setUp(self): |
| 256 """Creates a test dir with 3 files and one nested subdirectory + file.""" |
| 257 super(FileIteratorTests, self).setUp() |
| 258 |
| 259 self.test_dir = self.CreateTempDir(test_files=[ |
| 260 'abcd', 'abdd', 'ade$', ('dir1', 'dir2', 'zzz')]) |
| 261 |
| 262 self.root_files_uri_strs = set([ |
| 263 suri(self.test_dir, 'abcd'), |
| 264 suri(self.test_dir, 'abdd'), |
| 265 suri(self.test_dir, 'ade$')]) |
| 266 |
| 267 self.subdirs_uri_strs = set([suri(self.test_dir, 'dir1')]) |
| 268 |
| 269 self.nested_files_uri_strs = set([ |
| 270 suri(self.test_dir, 'dir1', 'dir2', 'zzz')]) |
| 271 |
| 272 self.immed_child_uri_strs = self.root_files_uri_strs | self.subdirs_uri_strs |
| 273 self.all_file_uri_strs = ( |
| 274 self.root_files_uri_strs | self.nested_files_uri_strs) |
| 275 |
| 276 def testContainsWildcard(self): |
| 277 """Tests ContainsWildcard call.""" |
| 278 self.assertTrue(ContainsWildcard('a*.txt')) |
| 279 self.assertTrue(ContainsWildcard('a[0-9].txt')) |
| 280 self.assertFalse(ContainsWildcard('0-9.txt')) |
| 281 self.assertTrue(ContainsWildcard('?.txt')) |
| 282 |
| 283 def testNoOpDirectoryIterator(self): |
| 284 """Tests that directory-only URI iterates just that one URI.""" |
| 285 results = list( |
| 286 self._test_wildcard_iterator(suri(tempfile.tempdir)).IterAll( |
| 287 expand_top_level_buckets=True)) |
| 288 self.assertEqual(1, len(results)) |
| 289 self.assertEqual(suri(tempfile.tempdir), str(results[0])) |
| 290 |
| 291 def testMatchingAllFiles(self): |
| 292 """Tests matching all files, based on wildcard.""" |
| 293 uri = self._test_storage_uri(suri(self.test_dir, '*')) |
| 294 actual_uri_strs = set(str(u) for u in |
| 295 self._test_wildcard_iterator(uri).IterAll( |
| 296 expand_top_level_buckets=True)) |
| 297 self.assertEqual(self.immed_child_uri_strs, actual_uri_strs) |
| 298 |
| 299 def testMatchingFileSubset(self): |
| 300 """Tests matching a subset of files, based on wildcard.""" |
| 301 exp_uri_strs = set( |
| 302 [suri(self.test_dir, 'abcd'), suri(self.test_dir, 'abdd')]) |
| 303 uri = self._test_storage_uri(suri(self.test_dir, 'ab??')) |
| 304 actual_uri_strs = set(str(u) for u in |
| 305 self._test_wildcard_iterator(uri).IterAll( |
| 306 expand_top_level_buckets=True)) |
| 307 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 308 |
| 309 def testMatchingNonWildcardedUri(self): |
| 310 """Tests matching a single named file.""" |
| 311 exp_uri_strs = set([suri(self.test_dir, 'abcd')]) |
| 312 uri = self._test_storage_uri(suri(self.test_dir, 'abcd')) |
| 313 actual_uri_strs = set( |
| 314 str(u) for u in self._test_wildcard_iterator(uri).IterAll( |
| 315 expand_top_level_buckets=True)) |
| 316 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 317 |
| 318 def testMatchingFilesIgnoringOtherRegexChars(self): |
| 319 """Tests ignoring non-wildcard regex chars (e.g., ^ and $).""" |
| 320 |
| 321 exp_uri_strs = set([suri(self.test_dir, 'ade$')]) |
| 322 uri = self._test_storage_uri(suri(self.test_dir, 'ad*$')) |
| 323 actual_uri_strs = set( |
| 324 str(u) for u in self._test_wildcard_iterator(uri).IterAll( |
| 325 expand_top_level_buckets=True)) |
| 326 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 327 |
| 328 def testRecursiveDirectoryOnlyWildcarding(self): |
| 329 """Tests recursive expansion of directory-only '**' wildcard.""" |
| 330 uri = self._test_storage_uri(suri(self.test_dir, '**')) |
| 331 actual_uri_strs = set( |
| 332 str(u) for u in self._test_wildcard_iterator(uri).IterAll( |
| 333 expand_top_level_buckets=True)) |
| 334 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) |
| 335 |
| 336 def testRecursiveDirectoryPlusFileWildcarding(self): |
| 337 """Tests recursive expansion of '**' directory plus '*' wildcard.""" |
| 338 uri = self._test_storage_uri(suri(self.test_dir, '**', '*')) |
| 339 actual_uri_strs = set( |
| 340 str(u) for u in self._test_wildcard_iterator(uri).IterAll( |
| 341 expand_top_level_buckets=True)) |
| 342 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) |
| 343 |
| 344 def testInvalidRecursiveDirectoryWildcard(self): |
| 345 """Tests that wildcard containing '***' raises exception.""" |
| 346 try: |
| 347 uri = self._test_storage_uri(suri(self.test_dir, '***', 'abcd')) |
| 348 for unused_ in self._test_wildcard_iterator(uri).IterAll( |
| 349 expand_top_level_buckets=True): |
| 350 self.fail('Expected WildcardException not raised.') |
| 351 except wildcard_iterator.WildcardException, e: |
| 352 # Expected behavior. |
| 353 self.assertTrue(str(e).find('more than 2 consecutive') != -1) |
| 354 |
| 355 def testMissingDir(self): |
| 356 """Tests that wildcard gets empty iterator when directory doesn't exist.""" |
| 357 res = list( |
| 358 self._test_wildcard_iterator(suri('no_such_dir', '*')).IterAll( |
| 359 expand_top_level_buckets=True)) |
| 360 self.assertEqual(0, len(res)) |
| 361 |
| 362 def testExistingDirNoFileMatch(self): |
| 363 """Tests that wildcard returns empty iterator when there's no match.""" |
| 364 uri = self._test_storage_uri( |
| 365 suri(self.test_dir, 'non_existent*')) |
| 366 res = list(self._test_wildcard_iterator(uri).IterAll( |
| 367 expand_top_level_buckets=True)) |
| 368 self.assertEqual(0, len(res)) |
OLD | NEW |