| OLD | NEW |
| 1 # -*- coding: utf-8 -*- |
| 1 # Copyright 2010 Google Inc. All Rights Reserved. | 2 # Copyright 2010 Google Inc. All Rights Reserved. |
| 2 # | 3 # |
| 3 # Permission is hereby granted, free of charge, to any person obtaining a | 4 # Permission is hereby granted, free of charge, to any person obtaining a |
| 4 # copy of this software and associated documentation files (the | 5 # copy of this software and associated documentation files (the |
| 5 # "Software"), to deal in the Software without restriction, including | 6 # "Software"), to deal in the Software without restriction, including |
| 6 # without limitation the rights to use, copy, modify, merge, publish, dis- | 7 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 7 # tribute, sublicense, and/or sell copies of the Software, and to permit | 8 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 8 # persons to whom the Software is furnished to do so, subject to the fol- | 9 # persons to whom the Software is furnished to do so, subject to the fol- |
| 9 # lowing conditions: | 10 # lowing conditions: |
| 10 # | 11 # |
| 11 # The above copyright notice and this permission notice shall be included | 12 # The above copyright notice and this permission notice shall be included |
| 12 # in all copies or substantial portions of the Software. | 13 # in all copies or substantial portions of the Software. |
| 13 # | 14 # |
| 14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 15 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | 16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 16 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | 17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 17 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | 18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 18 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 20 # IN THE SOFTWARE. | 21 # IN THE SOFTWARE. |
| 22 """Unit tests for gsutil wildcard_iterator.""" |
| 21 | 23 |
| 22 """Unit tests for gslib wildcard_iterator""" | 24 from __future__ import absolute_import |
| 23 | 25 |
| 24 import os.path | |
| 25 import tempfile | 26 import tempfile |
| 26 | 27 |
| 27 from boto import InvalidUriError | |
| 28 | |
| 29 from gslib import wildcard_iterator | 28 from gslib import wildcard_iterator |
| 30 from gslib.project_id import ProjectIdHandler | 29 from gslib.exception import InvalidUrlError |
| 30 from gslib.storage_url import ContainsWildcard |
| 31 import gslib.tests.testcase as testcase | 31 import gslib.tests.testcase as testcase |
| 32 from gslib.wildcard_iterator import ContainsWildcard | |
| 33 from gslib.tests.util import ObjectToURI as suri | 32 from gslib.tests.util import ObjectToURI as suri |
| 34 | 33 |
| 35 | 34 |
| 36 class CloudWildcardIteratorTests(testcase.GsUtilUnitTestCase): | 35 class CloudWildcardIteratorTests(testcase.GsUtilUnitTestCase): |
| 37 """CloudWildcardIterator test suite""" | 36 """Unit tests for CloudWildcardIterator.""" |
| 38 | 37 |
| 39 def setUp(self): | 38 def setUp(self): |
| 40 """Creates 2 mock buckets, each containing 4 objects, including 1 nested.""" | 39 """Creates 2 mock buckets, each containing 4 objects, including 1 nested.""" |
| 41 super(CloudWildcardIteratorTests, self).setUp() | 40 super(CloudWildcardIteratorTests, self).setUp() |
| 42 self.immed_child_obj_names = ['abcd', 'abdd', 'ade$'] | 41 self.immed_child_obj_names = ['abcd', 'abdd', 'ade$'] |
| 43 self.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1', | 42 self.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1', |
| 44 'nested1/nested2/xyz2', 'nested1/nfile_abc'] | 43 'nested1/nested2/xyz2', 'nested1/nfile_abc'] |
| 45 | 44 |
| 46 self.base_bucket_uri = self.CreateBucket() | 45 self.base_bucket_uri = self.CreateBucket() |
| 47 self.prefix_bucket_name = '%s_' % self.base_bucket_uri.bucket_name[:61] | 46 self.prefix_bucket_name = '%s_' % self.base_bucket_uri.bucket_name[:61] |
| 48 self.base_uri_str = suri(self.base_bucket_uri) | 47 self.base_uri_str = suri(self.base_bucket_uri) |
| 49 self.base_uri_str = self.base_uri_str.replace( | 48 self.base_uri_str = self.base_uri_str.replace( |
| 50 self.base_bucket_uri.bucket_name, self.prefix_bucket_name) | 49 self.base_bucket_uri.bucket_name, self.prefix_bucket_name) |
| 51 | 50 |
| 52 self.test_bucket0_uri = self.CreateBucket( | 51 self.test_bucket0_uri = self.CreateBucket( |
| 53 bucket_name='%s0' % self.prefix_bucket_name) | 52 bucket_name='%s0' % self.prefix_bucket_name) |
| 54 self.test_bucket0_obj_uri_strs = set() | 53 self.test_bucket0_obj_uri_strs = set() |
| 55 for obj_name in self.all_obj_names: | 54 for obj_name in self.all_obj_names: |
| 56 obj_uri = self.CreateObject(bucket_uri=self.test_bucket0_uri, | 55 obj_uri = self.CreateObject(bucket_uri=self.test_bucket0_uri, |
| 57 object_name=obj_name, contents='') | 56 object_name=obj_name, contents='') |
| 58 self.test_bucket0_obj_uri_strs.add(suri(obj_uri)) | 57 self.test_bucket0_obj_uri_strs.add(suri(obj_uri)) |
| 59 | 58 |
| 60 self.test_bucket1_uri = self.CreateBucket( | 59 self.test_bucket1_uri = self.CreateBucket( |
| 61 bucket_name='%s1' % self.prefix_bucket_name) | 60 bucket_name='%s1' % self.prefix_bucket_name) |
| 62 self.test_bucket1_obj_uri_strs = set() | 61 self.test_bucket1_obj_uri_strs = set() |
| 63 for obj_name in self.all_obj_names: | 62 for obj_name in self.all_obj_names: |
| 64 obj_uri = self.CreateObject(bucket_uri=self.test_bucket1_uri, | 63 obj_uri = self.CreateObject(bucket_uri=self.test_bucket1_uri, |
| 65 object_name=obj_name, contents='') | 64 object_name=obj_name, contents='') |
| 66 self.test_bucket1_obj_uri_strs.add(suri(obj_uri)) | 65 self.test_bucket1_obj_uri_strs.add(suri(obj_uri)) |
| 67 | 66 |
| 68 def testNoOpObjectIterator(self): | 67 def testNoOpObjectIterator(self): |
| 69 """Tests that bucket-only URI iterates just that one URI""" | 68 """Tests that bucket-only URI iterates just that one URI.""" |
| 70 results = list( | 69 results = list( |
| 71 self._test_wildcard_iterator(self.test_bucket0_uri).IterUris()) | 70 self._test_wildcard_iterator(self.test_bucket0_uri).IterBuckets( |
| 71 bucket_fields=['id'])) |
| 72 self.assertEqual(1, len(results)) | 72 self.assertEqual(1, len(results)) |
| 73 self.assertEqual(str(self.test_bucket0_uri), str(results[0])) | 73 self.assertEqual(str(self.test_bucket0_uri), str(results[0])) |
| 74 | 74 |
| 75 def testMatchingAllObjects(self): | 75 def testMatchingAllObjects(self): |
| 76 """Tests matching all objects, based on wildcard""" | 76 """Tests matching all objects, based on wildcard.""" |
| 77 actual_obj_uri_strs = set( | 77 actual_obj_uri_strs = set( |
| 78 str(u) for u in self._test_wildcard_iterator( | 78 str(u) for u in self._test_wildcard_iterator( |
| 79 self.test_bucket0_uri.clone_replace_name('**')).IterUris()) | 79 self.test_bucket0_uri.clone_replace_name('**')).IterAll( |
| 80 expand_top_level_buckets=True)) |
| 80 self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs) | 81 self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs) |
| 81 | 82 |
| 82 def testMatchingObjectSubset(self): | 83 def testMatchingObjectSubset(self): |
| 83 """Tests matching a subset of objects, based on wildcard""" | 84 """Tests matching a subset of objects, based on wildcard.""" |
| 84 exp_obj_uri_strs = set( | 85 exp_obj_uri_strs = set( |
| 85 [str(self.test_bucket0_uri.clone_replace_name('abcd')), | 86 [str(self.test_bucket0_uri.clone_replace_name('abcd')), |
| 86 str(self.test_bucket0_uri.clone_replace_name('abdd'))]) | 87 str(self.test_bucket0_uri.clone_replace_name('abdd'))]) |
| 87 actual_obj_uri_strs = set( | 88 actual_obj_uri_strs = set( |
| 88 str(u) for u in self._test_wildcard_iterator( | 89 str(u) for u in self._test_wildcard_iterator( |
| 89 self.test_bucket0_uri.clone_replace_name('ab??')).IterUris()) | 90 self.test_bucket0_uri.clone_replace_name('ab??')).IterAll( |
| 91 expand_top_level_buckets=True)) |
| 90 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | 92 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 91 | 93 |
| 92 def testMatchingNonWildcardedUri(self): | 94 def testMatchingNonWildcardedUri(self): |
| 93 """Tests matching a single named object""" | 95 """Tests matching a single named object.""" |
| 94 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd') | 96 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd') |
| 95 )]) | 97 )]) |
| 96 actual_obj_uri_strs = set( | 98 actual_obj_uri_strs = set( |
| 97 str(u) for u in self._test_wildcard_iterator( | 99 str(u) for u in self._test_wildcard_iterator( |
| 98 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris()) | 100 self.test_bucket0_uri.clone_replace_name('abcd')).IterAll( |
| 101 expand_top_level_buckets=True)) |
| 99 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | 102 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 100 | 103 |
| 101 def testWildcardedObjectUriWithVsWithoutPrefix(self): | 104 def testWildcardedObjectUriWithVsWithoutPrefix(self): |
| 102 """Tests that wildcarding w/ and w/o server prefix get same result""" | 105 """Tests that wildcarding w/ and w/o server prefix get same result.""" |
| 103 # (It's just more efficient to query w/o a prefix; wildcard | 106 # (It's just more efficient to query w/o a prefix; wildcard |
| 104 # iterator will filter the matches either way.) | 107 # iterator will filter the matches either way.) |
| 105 with_prefix_uri_strs = set( | 108 with_prefix_uri_strs = set( |
| 106 str(u) for u in self._test_wildcard_iterator( | 109 str(u) for u in self._test_wildcard_iterator( |
| 107 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris()) | 110 self.test_bucket0_uri.clone_replace_name('abcd')).IterAll( |
| 111 expand_top_level_buckets=True)) |
| 108 # By including a wildcard at the start of the string no prefix can be | 112 # By including a wildcard at the start of the string no prefix can be |
| 109 # used in server request. | 113 # used in server request. |
| 110 no_prefix_uri_strs = set( | 114 no_prefix_uri_strs = set( |
| 111 str(u) for u in self._test_wildcard_iterator( | 115 str(u) for u in self._test_wildcard_iterator( |
| 112 self.test_bucket0_uri.clone_replace_name('?bcd')).IterUris()) | 116 self.test_bucket0_uri.clone_replace_name('?bcd')).IterAll( |
| 117 expand_top_level_buckets=True)) |
| 113 self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs) | 118 self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs) |
| 114 | 119 |
| 115 def testWildcardedObjectUriNestedSubdirMatch(self): | 120 def testWildcardedObjectUriNestedSubdirMatch(self): |
| 116 """Tests wildcarding with a nested subdir""" | 121 """Tests wildcarding with a nested subdir.""" |
| 117 uri_strs = set() | 122 uri_strs = set() |
| 118 prefixes = set() | 123 prefixes = set() |
| 119 for blr in self._test_wildcard_iterator( | 124 for blr in self._test_wildcard_iterator( |
| 120 self.test_bucket0_uri.clone_replace_name('*')): | 125 self.test_bucket0_uri.clone_replace_name('*')): |
| 121 if blr.HasPrefix(): | 126 if blr.IsPrefix(): |
| 122 prefixes.add(blr.GetPrefix().name) | 127 prefixes.add(blr.root_object) |
| 123 else: | 128 else: |
| 124 uri_strs.add(blr.GetUri().uri) | 129 uri_strs.add(blr.url_string) |
| 125 exp_obj_uri_strs = set([suri(self.test_bucket0_uri, x) | 130 exp_obj_uri_strs = set([suri(self.test_bucket0_uri, x) |
| 126 for x in self.immed_child_obj_names]) | 131 for x in self.immed_child_obj_names]) |
| 127 self.assertEqual(exp_obj_uri_strs, uri_strs) | 132 self.assertEqual(exp_obj_uri_strs, uri_strs) |
| 128 self.assertEqual(1, len(prefixes)) | 133 self.assertEqual(1, len(prefixes)) |
| 129 self.assertTrue('nested1/' in prefixes) | 134 self.assertTrue('nested1/' in prefixes) |
| 130 | 135 |
| 131 def testWildcardedObjectUriNestedSubSubdirMatch(self): | |
| 132 """Tests wildcarding with a nested sub-subdir""" | |
| 133 for final_char in ('', '/'): | |
| 134 uri_strs = set() | |
| 135 prefixes = set() | |
| 136 for blr in self._test_wildcard_iterator( | |
| 137 self.test_bucket0_uri.clone_replace_name('nested1/*%s' % final_char)): | |
| 138 if blr.HasPrefix(): | |
| 139 prefixes.add(blr.GetPrefix().name) | |
| 140 else: | |
| 141 uri_strs.add(blr.GetUri().uri) | |
| 142 self.assertEqual(1, len(uri_strs)) | |
| 143 self.assertEqual(1, len(prefixes)) | |
| 144 self.assertTrue('nested1/nested2/' in prefixes) | |
| 145 | |
| 146 def testWildcardPlusSubdirMatch(self): | 136 def testWildcardPlusSubdirMatch(self): |
| 147 """Tests gs://bucket/*/subdir matching""" | 137 """Tests gs://bucket/*/subdir matching.""" |
| 148 actual_uri_strs = set() | 138 actual_uri_strs = set() |
| 149 actual_prefixes = set() | 139 actual_prefixes = set() |
| 150 for blr in self._test_wildcard_iterator( | 140 for blr in self._test_wildcard_iterator( |
| 151 self.test_bucket0_uri.clone_replace_name('*/nested1')): | 141 self.test_bucket0_uri.clone_replace_name('*/nested1')): |
| 152 if blr.HasPrefix(): | 142 if blr.IsPrefix(): |
| 153 actual_prefixes.add(blr.GetPrefix().name) | 143 actual_prefixes.add(blr.root_object) |
| 154 else: | 144 else: |
| 155 actual_uri_strs.add(blr.GetUri().uri) | 145 actual_uri_strs.add(blr.url_string) |
| 156 expected_uri_strs = set() | 146 expected_uri_strs = set() |
| 157 expected_prefixes = set(['nested1/']) | 147 expected_prefixes = set(['nested1/']) |
| 158 self.assertEqual(expected_prefixes, actual_prefixes) | 148 self.assertEqual(expected_prefixes, actual_prefixes) |
| 159 self.assertEqual(expected_uri_strs, actual_uri_strs) | 149 self.assertEqual(expected_uri_strs, actual_uri_strs) |
| 160 | 150 |
| 161 def testWildcardPlusSubdirSubdirMatch(self): | 151 def testWildcardPlusSubdirSubdirMatch(self): |
| 162 """Tests gs://bucket/*/subdir/* matching""" | 152 """Tests gs://bucket/*/subdir/* matching.""" |
| 163 actual_uri_strs = set() | 153 actual_uri_strs = set() |
| 164 actual_prefixes = set() | 154 actual_prefixes = set() |
| 165 for blr in self._test_wildcard_iterator( | 155 for blr in self._test_wildcard_iterator( |
| 166 self.test_bucket0_uri.clone_replace_name('*/nested2/*')): | 156 self.test_bucket0_uri.clone_replace_name('*/nested2/*')): |
| 167 if blr.HasPrefix(): | 157 if blr.IsPrefix(): |
| 168 actual_prefixes.add(blr.GetPrefix().name) | 158 actual_prefixes.add(blr.root_object) |
| 169 else: | 159 else: |
| 170 actual_uri_strs.add(blr.GetUri().uri) | 160 actual_uri_strs.add(blr.url_string) |
| 171 expected_uri_strs = set([ | 161 expected_uri_strs = set([ |
| 172 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri, | 162 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri, |
| 173 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri]) | 163 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri]) |
| 174 expected_prefixes = set() | 164 expected_prefixes = set() |
| 175 self.assertEqual(expected_prefixes, actual_prefixes) | 165 self.assertEqual(expected_prefixes, actual_prefixes) |
| 176 self.assertEqual(expected_uri_strs, actual_uri_strs) | 166 self.assertEqual(expected_uri_strs, actual_uri_strs) |
| 177 | 167 |
| 178 def testNoMatchingWildcardedObjectUri(self): | 168 def testNoMatchingWildcardedObjectUri(self): |
| 179 """Tests that get back an empty iterator for non-matching wildcarded URI""" | 169 """Tests that get back an empty iterator for non-matching wildcarded URI.""" |
| 180 res = list(self._test_wildcard_iterator( | 170 res = list(self._test_wildcard_iterator( |
| 181 self.test_bucket0_uri.clone_replace_name('*x0')).IterUris()) | 171 self.test_bucket0_uri.clone_replace_name('*x0')).IterAll( |
| 172 expand_top_level_buckets=True)) |
| 182 self.assertEqual(0, len(res)) | 173 self.assertEqual(0, len(res)) |
| 183 | 174 |
| 184 def testWildcardedInvalidObjectUri(self): | 175 def testWildcardedInvalidObjectUri(self): |
| 185 """Tests that we raise an exception for wildcarded invalid URI""" | 176 """Tests that we raise an exception for wildcarded invalid URI.""" |
| 186 try: | 177 try: |
| 187 for unused_ in self._test_wildcard_iterator( | 178 for unused_ in self._test_wildcard_iterator( |
| 188 'badscheme://asdf').IterUris(): | 179 'badscheme://asdf').IterAll(expand_top_level_buckets=True): |
| 189 self.assertFalse('Expected InvalidUriError not raised.') | 180 self.assertFalse('Expected InvalidUrlError not raised.') |
| 190 except InvalidUriError, e: | 181 except InvalidUrlError, e: |
| 191 # Expected behavior. | 182 # Expected behavior. |
| 192 self.assertTrue(e.message.find('Unrecognized scheme') != -1) | 183 self.assertTrue(e.message.find('Unrecognized scheme') != -1) |
| 193 | 184 |
| 194 def testSingleMatchWildcardedBucketUri(self): | 185 def testSingleMatchWildcardedBucketUri(self): |
| 195 """Tests matching a single bucket based on a wildcarded bucket URI""" | 186 """Tests matching a single bucket based on a wildcarded bucket URI.""" |
| 196 exp_obj_uri_strs = set([ | 187 exp_obj_uri_strs = set([ |
| 197 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) | 188 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) |
| 198 actual_obj_uri_strs = set( | 189 actual_obj_uri_strs = set( |
| 199 str(u) for u in self._test_wildcard_iterator( | 190 str(u) for u in self._test_wildcard_iterator( |
| 200 '%s*1' % self.base_uri_str).IterUris()) | 191 '%s*1' % self.base_uri_str).IterBuckets(bucket_fields=['id'])) |
| 201 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | 192 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 202 | 193 |
| 203 def testMultiMatchWildcardedBucketUri(self): | 194 def testMultiMatchWildcardedBucketUri(self): |
| 204 """Tests matching a multiple buckets based on a wildcarded bucket URI""" | 195 """Tests matching a multiple buckets based on a wildcarded bucket URI.""" |
| 205 exp_obj_uri_strs = set([ | 196 exp_obj_uri_strs = set([ |
| 206 suri(self.test_bucket0_uri) + self.test_bucket0_uri.delim, | 197 suri(self.test_bucket0_uri) + self.test_bucket0_uri.delim, |
| 207 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) | 198 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) |
| 208 actual_obj_uri_strs = set( | 199 actual_obj_uri_strs = set( |
| 209 str(u) for u in self._test_wildcard_iterator( | 200 str(u) for u in self._test_wildcard_iterator( |
| 210 '%s*' % self.base_uri_str).IterUris()) | 201 '%s*' % self.base_uri_str).IterBuckets(bucket_fields=['id'])) |
| 211 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | 202 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 212 | 203 |
| 213 def testWildcardBucketAndObjectUri(self): | 204 def testWildcardBucketAndObjectUri(self): |
| 214 """Tests matching with both bucket and object wildcards""" | 205 """Tests matching with both bucket and object wildcards.""" |
| 215 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( | 206 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 216 'abcd'))]) | 207 'abcd'))]) |
| 217 actual_obj_uri_strs = set( | 208 actual_obj_uri_strs = set( |
| 218 str(u) for u in self._test_wildcard_iterator( | 209 str(u) for u in self._test_wildcard_iterator( |
| 219 '%s0*/abc*' % self.base_uri_str).IterUris()) | 210 '%s0*/abc*' % self.base_uri_str).IterAll( |
| 211 expand_top_level_buckets=True)) |
| 220 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | 212 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 221 | 213 |
| 222 def testWildcardUpToFinalCharSubdirPlusObjectName(self): | 214 def testWildcardUpToFinalCharSubdirPlusObjectName(self): |
| 223 """Tests wildcard subd*r/obj name""" | 215 """Tests wildcard subd*r/obj name.""" |
| 224 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( | 216 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 225 'nested1/nested2/xyz1'))]) | 217 'nested1/nested2/xyz1'))]) |
| 226 actual_obj_uri_strs = set( | 218 actual_obj_uri_strs = set( |
| 227 str(u) for u in self._test_wildcard_iterator( | 219 str(u) for u in self._test_wildcard_iterator( |
| 228 '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterUris()) | 220 '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterAll( |
| 221 expand_top_level_buckets=True)) |
| 229 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | 222 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 230 | 223 |
| 231 def testPostRecursiveWildcard(self): | 224 def testPostRecursiveWildcard(self): |
| 232 """Tests that wildcard containing ** followed by an additional wildcard work
s""" | 225 """Tests wildcard containing ** followed by an additional wildcard.""" |
| 233 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( | 226 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 234 'nested1/nested2/xyz2'))]) | 227 'nested1/nested2/xyz2'))]) |
| 235 actual_obj_uri_strs = set( | 228 actual_obj_uri_strs = set( |
| 236 str(u) for u in self._test_wildcard_iterator( | 229 str(u) for u in self._test_wildcard_iterator( |
| 237 '%s**/*y*2' % self.test_bucket0_uri.uri).IterUris()) | 230 '%s**/*y*2' % self.test_bucket0_uri.uri).IterAll( |
| 231 expand_top_level_buckets=True)) |
| 238 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | 232 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 239 | 233 |
| 240 def testCallingGetKeyOnProviderOnlyWildcardIteration(self): | 234 def testWildcardFields(self): |
| 241 """Tests that attempting iterating provider-only wildcard raises""" | 235 """Tests that wildcard w/fields specification returns correct fields.""" |
| 242 try: | 236 blrs = set( |
| 243 from gslib.bucket_listing_ref import BucketListingRefException | 237 u for u in self._test_wildcard_iterator( |
| 244 for iter_result in wildcard_iterator.wildcard_iterator( | 238 self.test_bucket0_uri.clone_replace_name('**')).IterAll( |
| 245 'gs://', ProjectIdHandler(), | 239 bucket_listing_fields=['updated'])) |
| 246 bucket_storage_uri_class=self.mock_bucket_storage_uri): | 240 self.assertTrue(len(blrs)) |
| 247 iter_result.GetKey() | 241 for blr in blrs: |
| 248 self.fail('Expected BucketListingRefException not raised.') | 242 self.assertTrue(blr.root_object and blr.root_object.updated) |
| 249 except BucketListingRefException, e: | 243 blrs = set( |
| 250 self.assertTrue(str(e).find( | 244 u for u in self._test_wildcard_iterator( |
| 251 'Attempt to call GetKey() on Key-less BucketListingRef') != -1) | 245 self.test_bucket0_uri.clone_replace_name('**')).IterAll( |
| 246 bucket_listing_fields=['generation'])) |
| 247 self.assertTrue(len(blrs)) |
| 248 for blr in blrs: |
| 249 self.assertTrue(blr.root_object and not blr.root_object.updated) |
| 252 | 250 |
| 253 | 251 |
| 254 class FileIteratorTests(testcase.GsUtilUnitTestCase): | 252 class FileIteratorTests(testcase.GsUtilUnitTestCase): |
| 255 """FileWildcardIterator test suite""" | 253 """Unit tests for FileWildcardIterator.""" |
| 256 | 254 |
| 257 def setUp(self): | 255 def setUp(self): |
| 258 """ | 256 """Creates a test dir with 3 files and one nested subdirectory + file.""" |
| 259 Creates a test dir containing 3 files and one nested subdirectory + file. | |
| 260 """ | |
| 261 super(FileIteratorTests, self).setUp() | 257 super(FileIteratorTests, self).setUp() |
| 262 | 258 |
| 263 self.test_dir = self.CreateTempDir(test_files=[ | 259 self.test_dir = self.CreateTempDir(test_files=[ |
| 264 'abcd', 'abdd', 'ade$', ('dir1', 'dir2', 'zzz')]) | 260 'abcd', 'abdd', 'ade$', ('dir1', 'dir2', 'zzz')]) |
| 265 | 261 |
| 266 self.root_files_uri_strs = set([ | 262 self.root_files_uri_strs = set([ |
| 267 suri(self.test_dir, 'abcd'), | 263 suri(self.test_dir, 'abcd'), |
| 268 suri(self.test_dir, 'abdd'), | 264 suri(self.test_dir, 'abdd'), |
| 269 suri(self.test_dir, 'ade$')]) | 265 suri(self.test_dir, 'ade$')]) |
| 270 | 266 |
| 271 self.subdirs_uri_strs = set([suri(self.test_dir, 'dir1')]) | 267 self.subdirs_uri_strs = set([suri(self.test_dir, 'dir1')]) |
| 272 | 268 |
| 273 self.nested_files_uri_strs = set([ | 269 self.nested_files_uri_strs = set([ |
| 274 suri(self.test_dir, 'dir1', 'dir2', 'zzz')]) | 270 suri(self.test_dir, 'dir1', 'dir2', 'zzz')]) |
| 275 | 271 |
| 276 self.immed_child_uri_strs = self.root_files_uri_strs | self.subdirs_uri_strs | 272 self.immed_child_uri_strs = self.root_files_uri_strs | self.subdirs_uri_strs |
| 277 self.all_file_uri_strs = ( | 273 self.all_file_uri_strs = ( |
| 278 self.root_files_uri_strs | self.nested_files_uri_strs) | 274 self.root_files_uri_strs | self.nested_files_uri_strs) |
| 279 | 275 |
| 280 def testContainsWildcard(self): | 276 def testContainsWildcard(self): |
| 281 """Tests ContainsWildcard call""" | 277 """Tests ContainsWildcard call.""" |
| 282 self.assertTrue(ContainsWildcard('a*.txt')) | 278 self.assertTrue(ContainsWildcard('a*.txt')) |
| 283 self.assertTrue(ContainsWildcard('a[0-9].txt')) | 279 self.assertTrue(ContainsWildcard('a[0-9].txt')) |
| 284 self.assertFalse(ContainsWildcard('0-9.txt')) | 280 self.assertFalse(ContainsWildcard('0-9.txt')) |
| 285 self.assertTrue(ContainsWildcard('?.txt')) | 281 self.assertTrue(ContainsWildcard('?.txt')) |
| 286 | 282 |
| 287 def testNoOpDirectoryIterator(self): | 283 def testNoOpDirectoryIterator(self): |
| 288 """Tests that directory-only URI iterates just that one URI""" | 284 """Tests that directory-only URI iterates just that one URI.""" |
| 289 results = list( | 285 results = list( |
| 290 self._test_wildcard_iterator(suri(tempfile.tempdir)).IterUris()) | 286 self._test_wildcard_iterator(suri(tempfile.tempdir)).IterAll( |
| 287 expand_top_level_buckets=True)) |
| 291 self.assertEqual(1, len(results)) | 288 self.assertEqual(1, len(results)) |
| 292 self.assertEqual(suri(tempfile.tempdir), str(results[0])) | 289 self.assertEqual(suri(tempfile.tempdir), str(results[0])) |
| 293 | 290 |
| 294 def testMatchingAllFiles(self): | 291 def testMatchingAllFiles(self): |
| 295 """Tests matching all files, based on wildcard""" | 292 """Tests matching all files, based on wildcard.""" |
| 296 uri = self._test_storage_uri(suri(self.test_dir, '*')) | 293 uri = self._test_storage_uri(suri(self.test_dir, '*')) |
| 297 actual_uri_strs = set(str(u) for u in | 294 actual_uri_strs = set(str(u) for u in |
| 298 self._test_wildcard_iterator(uri).IterUris() | 295 self._test_wildcard_iterator(uri).IterAll( |
| 299 ) | 296 expand_top_level_buckets=True)) |
| 300 self.assertEqual(self.immed_child_uri_strs, actual_uri_strs) | 297 self.assertEqual(self.immed_child_uri_strs, actual_uri_strs) |
| 301 | 298 |
| 302 def testMatchingFileSubset(self): | 299 def testMatchingFileSubset(self): |
| 303 """Tests matching a subset of files, based on wildcard""" | 300 """Tests matching a subset of files, based on wildcard.""" |
| 304 exp_uri_strs = set( | 301 exp_uri_strs = set( |
| 305 [suri(self.test_dir, 'abcd'), suri(self.test_dir, 'abdd')]) | 302 [suri(self.test_dir, 'abcd'), suri(self.test_dir, 'abdd')]) |
| 306 uri = self._test_storage_uri(suri(self.test_dir, 'ab??')) | 303 uri = self._test_storage_uri(suri(self.test_dir, 'ab??')) |
| 307 actual_uri_strs = set(str(u) for u in | 304 actual_uri_strs = set(str(u) for u in |
| 308 self._test_wildcard_iterator(uri).IterUris() | 305 self._test_wildcard_iterator(uri).IterAll( |
| 309 ) | 306 expand_top_level_buckets=True)) |
| 310 self.assertEqual(exp_uri_strs, actual_uri_strs) | 307 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 311 | 308 |
| 312 def testMatchingNonWildcardedUri(self): | 309 def testMatchingNonWildcardedUri(self): |
| 313 """Tests matching a single named file""" | 310 """Tests matching a single named file.""" |
| 314 exp_uri_strs = set([suri(self.test_dir, 'abcd')]) | 311 exp_uri_strs = set([suri(self.test_dir, 'abcd')]) |
| 315 uri = self._test_storage_uri(suri(self.test_dir, 'abcd')) | 312 uri = self._test_storage_uri(suri(self.test_dir, 'abcd')) |
| 316 actual_uri_strs = set( | 313 actual_uri_strs = set( |
| 317 str(u) for u in self._test_wildcard_iterator(uri).IterUris()) | 314 str(u) for u in self._test_wildcard_iterator(uri).IterAll( |
| 315 expand_top_level_buckets=True)) |
| 318 self.assertEqual(exp_uri_strs, actual_uri_strs) | 316 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 319 | 317 |
| 320 def testMatchingFilesIgnoringOtherRegexChars(self): | 318 def testMatchingFilesIgnoringOtherRegexChars(self): |
| 321 """Tests ignoring non-wildcard regex chars (e.g., ^ and $)""" | 319 """Tests ignoring non-wildcard regex chars (e.g., ^ and $).""" |
| 322 | 320 |
| 323 exp_uri_strs = set([suri(self.test_dir, 'ade$')]) | 321 exp_uri_strs = set([suri(self.test_dir, 'ade$')]) |
| 324 uri = self._test_storage_uri(suri(self.test_dir, 'ad*$')) | 322 uri = self._test_storage_uri(suri(self.test_dir, 'ad*$')) |
| 325 actual_uri_strs = set( | 323 actual_uri_strs = set( |
| 326 str(u) for u in self._test_wildcard_iterator(uri).IterUris()) | 324 str(u) for u in self._test_wildcard_iterator(uri).IterAll( |
| 325 expand_top_level_buckets=True)) |
| 327 self.assertEqual(exp_uri_strs, actual_uri_strs) | 326 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 328 | 327 |
| 329 def testRecursiveDirectoryOnlyWildcarding(self): | 328 def testRecursiveDirectoryOnlyWildcarding(self): |
| 330 """Tests recursive expansion of directory-only '**' wildcard""" | 329 """Tests recursive expansion of directory-only '**' wildcard.""" |
| 331 uri = self._test_storage_uri(suri(self.test_dir, '**')) | 330 uri = self._test_storage_uri(suri(self.test_dir, '**')) |
| 332 actual_uri_strs = set( | 331 actual_uri_strs = set( |
| 333 str(u) for u in self._test_wildcard_iterator(uri).IterUris()) | 332 str(u) for u in self._test_wildcard_iterator(uri).IterAll( |
| 333 expand_top_level_buckets=True)) |
| 334 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) | 334 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) |
| 335 | 335 |
| 336 def testRecursiveDirectoryPlusFileWildcarding(self): | 336 def testRecursiveDirectoryPlusFileWildcarding(self): |
| 337 """Tests recursive expansion of '**' directory plus '*' wildcard""" | 337 """Tests recursive expansion of '**' directory plus '*' wildcard.""" |
| 338 uri = self._test_storage_uri(suri(self.test_dir, '**', '*')) | 338 uri = self._test_storage_uri(suri(self.test_dir, '**', '*')) |
| 339 actual_uri_strs = set( | 339 actual_uri_strs = set( |
| 340 str(u) for u in self._test_wildcard_iterator(uri).IterUris()) | 340 str(u) for u in self._test_wildcard_iterator(uri).IterAll( |
| 341 expand_top_level_buckets=True)) |
| 341 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) | 342 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) |
| 342 | 343 |
| 343 def testInvalidRecursiveDirectoryWildcard(self): | 344 def testInvalidRecursiveDirectoryWildcard(self): |
| 344 """Tests that wildcard containing '***' raises exception""" | 345 """Tests that wildcard containing '***' raises exception.""" |
| 345 try: | 346 try: |
| 346 uri = self._test_storage_uri(suri(self.test_dir, '***', 'abcd')) | 347 uri = self._test_storage_uri(suri(self.test_dir, '***', 'abcd')) |
| 347 for unused_ in self._test_wildcard_iterator(uri).IterUris(): | 348 for unused_ in self._test_wildcard_iterator(uri).IterAll( |
| 349 expand_top_level_buckets=True): |
| 348 self.fail('Expected WildcardException not raised.') | 350 self.fail('Expected WildcardException not raised.') |
| 349 except wildcard_iterator.WildcardException, e: | 351 except wildcard_iterator.WildcardException, e: |
| 350 # Expected behavior. | 352 # Expected behavior. |
| 351 self.assertTrue(str(e).find('more than 2 consecutive') != -1) | 353 self.assertTrue(str(e).find('more than 2 consecutive') != -1) |
| 352 | 354 |
| 353 def testMissingDir(self): | 355 def testMissingDir(self): |
| 354 """Tests that wildcard gets empty iterator when directory doesn't exist""" | 356 """Tests that wildcard gets empty iterator when directory doesn't exist.""" |
| 355 res = list( | 357 res = list( |
| 356 self._test_wildcard_iterator(suri('no_such_dir', '*')).IterUris()) | 358 self._test_wildcard_iterator(suri('no_such_dir', '*')).IterAll( |
| 359 expand_top_level_buckets=True)) |
| 357 self.assertEqual(0, len(res)) | 360 self.assertEqual(0, len(res)) |
| 358 | 361 |
| 359 def testExistingDirNoFileMatch(self): | 362 def testExistingDirNoFileMatch(self): |
| 360 """Tests that wildcard returns empty iterator when there's no match""" | 363 """Tests that wildcard returns empty iterator when there's no match.""" |
| 361 uri = self._test_storage_uri( | 364 uri = self._test_storage_uri( |
| 362 suri(self.test_dir, 'non_existent*')) | 365 suri(self.test_dir, 'non_existent*')) |
| 363 res = list(self._test_wildcard_iterator(uri).IterUris()) | 366 res = list(self._test_wildcard_iterator(uri).IterAll( |
| 367 expand_top_level_buckets=True)) |
| 364 self.assertEqual(0, len(res)) | 368 self.assertEqual(0, len(res)) |
| OLD | NEW |