OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2010 Google Inc. All Rights Reserved. | |
3 # | |
4 # Permission is hereby granted, free of charge, to any person obtaining a | |
5 # copy of this software and associated documentation files (the | |
6 # "Software"), to deal in the Software without restriction, including | |
7 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
8 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
9 # persons to whom the Software is furnished to do so, subject to the fol- | |
10 # lowing conditions: | |
11 # | |
12 # The above copyright notice and this permission notice shall be included | |
13 # in all copies or substantial portions of the Software. | |
14 # | |
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 # IN THE SOFTWARE. | |
22 """Unit tests for gsutil wildcard_iterator.""" | |
23 | |
24 from __future__ import absolute_import | |
25 | |
26 import tempfile | |
27 | |
28 from gslib import wildcard_iterator | |
29 from gslib.exception import InvalidUrlError | |
30 from gslib.storage_url import ContainsWildcard | |
31 import gslib.tests.testcase as testcase | |
32 from gslib.tests.util import ObjectToURI as suri | |
33 | |
34 | |
35 class CloudWildcardIteratorTests(testcase.GsUtilUnitTestCase): | |
36 """Unit tests for CloudWildcardIterator.""" | |
37 | |
38 def setUp(self): | |
39 """Creates 2 mock buckets, each containing 4 objects, including 1 nested.""" | |
40 super(CloudWildcardIteratorTests, self).setUp() | |
41 self.immed_child_obj_names = ['abcd', 'abdd', 'ade$'] | |
42 self.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1', | |
43 'nested1/nested2/xyz2', 'nested1/nfile_abc'] | |
44 | |
45 self.base_bucket_uri = self.CreateBucket() | |
46 self.prefix_bucket_name = '%s_' % self.base_bucket_uri.bucket_name[:61] | |
47 self.base_uri_str = suri(self.base_bucket_uri) | |
48 self.base_uri_str = self.base_uri_str.replace( | |
49 self.base_bucket_uri.bucket_name, self.prefix_bucket_name) | |
50 | |
51 self.test_bucket0_uri = self.CreateBucket( | |
52 bucket_name='%s0' % self.prefix_bucket_name) | |
53 self.test_bucket0_obj_uri_strs = set() | |
54 for obj_name in self.all_obj_names: | |
55 obj_uri = self.CreateObject(bucket_uri=self.test_bucket0_uri, | |
56 object_name=obj_name, contents='') | |
57 self.test_bucket0_obj_uri_strs.add(suri(obj_uri)) | |
58 | |
59 self.test_bucket1_uri = self.CreateBucket( | |
60 bucket_name='%s1' % self.prefix_bucket_name) | |
61 self.test_bucket1_obj_uri_strs = set() | |
62 for obj_name in self.all_obj_names: | |
63 obj_uri = self.CreateObject(bucket_uri=self.test_bucket1_uri, | |
64 object_name=obj_name, contents='') | |
65 self.test_bucket1_obj_uri_strs.add(suri(obj_uri)) | |
66 | |
67 def testNoOpObjectIterator(self): | |
68 """Tests that bucket-only URI iterates just that one URI.""" | |
69 results = list( | |
70 self._test_wildcard_iterator(self.test_bucket0_uri).IterBuckets( | |
71 bucket_fields=['id'])) | |
72 self.assertEqual(1, len(results)) | |
73 self.assertEqual(str(self.test_bucket0_uri), str(results[0])) | |
74 | |
75 def testMatchingAllObjects(self): | |
76 """Tests matching all objects, based on wildcard.""" | |
77 actual_obj_uri_strs = set( | |
78 str(u) for u in self._test_wildcard_iterator( | |
79 self.test_bucket0_uri.clone_replace_name('**')).IterAll( | |
80 expand_top_level_buckets=True)) | |
81 self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs) | |
82 | |
83 def testMatchingObjectSubset(self): | |
84 """Tests matching a subset of objects, based on wildcard.""" | |
85 exp_obj_uri_strs = set( | |
86 [str(self.test_bucket0_uri.clone_replace_name('abcd')), | |
87 str(self.test_bucket0_uri.clone_replace_name('abdd'))]) | |
88 actual_obj_uri_strs = set( | |
89 str(u) for u in self._test_wildcard_iterator( | |
90 self.test_bucket0_uri.clone_replace_name('ab??')).IterAll( | |
91 expand_top_level_buckets=True)) | |
92 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | |
93 | |
94 def testMatchingNonWildcardedUri(self): | |
95 """Tests matching a single named object.""" | |
96 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd') | |
97 )]) | |
98 actual_obj_uri_strs = set( | |
99 str(u) for u in self._test_wildcard_iterator( | |
100 self.test_bucket0_uri.clone_replace_name('abcd')).IterAll( | |
101 expand_top_level_buckets=True)) | |
102 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | |
103 | |
104 def testWildcardedObjectUriWithVsWithoutPrefix(self): | |
105 """Tests that wildcarding w/ and w/o server prefix get same result.""" | |
106 # (It's just more efficient to query w/o a prefix; wildcard | |
107 # iterator will filter the matches either way.) | |
108 with_prefix_uri_strs = set( | |
109 str(u) for u in self._test_wildcard_iterator( | |
110 self.test_bucket0_uri.clone_replace_name('abcd')).IterAll( | |
111 expand_top_level_buckets=True)) | |
112 # By including a wildcard at the start of the string no prefix can be | |
113 # used in server request. | |
114 no_prefix_uri_strs = set( | |
115 str(u) for u in self._test_wildcard_iterator( | |
116 self.test_bucket0_uri.clone_replace_name('?bcd')).IterAll( | |
117 expand_top_level_buckets=True)) | |
118 self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs) | |
119 | |
120 def testWildcardedObjectUriNestedSubdirMatch(self): | |
121 """Tests wildcarding with a nested subdir.""" | |
122 uri_strs = set() | |
123 prefixes = set() | |
124 for blr in self._test_wildcard_iterator( | |
125 self.test_bucket0_uri.clone_replace_name('*')): | |
126 if blr.IsPrefix(): | |
127 prefixes.add(blr.root_object) | |
128 else: | |
129 uri_strs.add(blr.url_string) | |
130 exp_obj_uri_strs = set([suri(self.test_bucket0_uri, x) | |
131 for x in self.immed_child_obj_names]) | |
132 self.assertEqual(exp_obj_uri_strs, uri_strs) | |
133 self.assertEqual(1, len(prefixes)) | |
134 self.assertTrue('nested1/' in prefixes) | |
135 | |
136 def testWildcardPlusSubdirMatch(self): | |
137 """Tests gs://bucket/*/subdir matching.""" | |
138 actual_uri_strs = set() | |
139 actual_prefixes = set() | |
140 for blr in self._test_wildcard_iterator( | |
141 self.test_bucket0_uri.clone_replace_name('*/nested1')): | |
142 if blr.IsPrefix(): | |
143 actual_prefixes.add(blr.root_object) | |
144 else: | |
145 actual_uri_strs.add(blr.url_string) | |
146 expected_uri_strs = set() | |
147 expected_prefixes = set(['nested1/']) | |
148 self.assertEqual(expected_prefixes, actual_prefixes) | |
149 self.assertEqual(expected_uri_strs, actual_uri_strs) | |
150 | |
151 def testWildcardPlusSubdirSubdirMatch(self): | |
152 """Tests gs://bucket/*/subdir/* matching.""" | |
153 actual_uri_strs = set() | |
154 actual_prefixes = set() | |
155 for blr in self._test_wildcard_iterator( | |
156 self.test_bucket0_uri.clone_replace_name('*/nested2/*')): | |
157 if blr.IsPrefix(): | |
158 actual_prefixes.add(blr.root_object) | |
159 else: | |
160 actual_uri_strs.add(blr.url_string) | |
161 expected_uri_strs = set([ | |
162 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri, | |
163 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri]) | |
164 expected_prefixes = set() | |
165 self.assertEqual(expected_prefixes, actual_prefixes) | |
166 self.assertEqual(expected_uri_strs, actual_uri_strs) | |
167 | |
168 def testNoMatchingWildcardedObjectUri(self): | |
169 """Tests that get back an empty iterator for non-matching wildcarded URI.""" | |
170 res = list(self._test_wildcard_iterator( | |
171 self.test_bucket0_uri.clone_replace_name('*x0')).IterAll( | |
172 expand_top_level_buckets=True)) | |
173 self.assertEqual(0, len(res)) | |
174 | |
175 def testWildcardedInvalidObjectUri(self): | |
176 """Tests that we raise an exception for wildcarded invalid URI.""" | |
177 try: | |
178 for unused_ in self._test_wildcard_iterator( | |
179 'badscheme://asdf').IterAll(expand_top_level_buckets=True): | |
180 self.assertFalse('Expected InvalidUrlError not raised.') | |
181 except InvalidUrlError, e: | |
182 # Expected behavior. | |
183 self.assertTrue(e.message.find('Unrecognized scheme') != -1) | |
184 | |
185 def testSingleMatchWildcardedBucketUri(self): | |
186 """Tests matching a single bucket based on a wildcarded bucket URI.""" | |
187 exp_obj_uri_strs = set([ | |
188 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) | |
189 actual_obj_uri_strs = set( | |
190 str(u) for u in self._test_wildcard_iterator( | |
191 '%s*1' % self.base_uri_str).IterBuckets(bucket_fields=['id'])) | |
192 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | |
193 | |
194 def testMultiMatchWildcardedBucketUri(self): | |
195 """Tests matching a multiple buckets based on a wildcarded bucket URI.""" | |
196 exp_obj_uri_strs = set([ | |
197 suri(self.test_bucket0_uri) + self.test_bucket0_uri.delim, | |
198 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) | |
199 actual_obj_uri_strs = set( | |
200 str(u) for u in self._test_wildcard_iterator( | |
201 '%s*' % self.base_uri_str).IterBuckets(bucket_fields=['id'])) | |
202 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | |
203 | |
204 def testWildcardBucketAndObjectUri(self): | |
205 """Tests matching with both bucket and object wildcards.""" | |
206 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( | |
207 'abcd'))]) | |
208 actual_obj_uri_strs = set( | |
209 str(u) for u in self._test_wildcard_iterator( | |
210 '%s0*/abc*' % self.base_uri_str).IterAll( | |
211 expand_top_level_buckets=True)) | |
212 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | |
213 | |
214 def testWildcardUpToFinalCharSubdirPlusObjectName(self): | |
215 """Tests wildcard subd*r/obj name.""" | |
216 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( | |
217 'nested1/nested2/xyz1'))]) | |
218 actual_obj_uri_strs = set( | |
219 str(u) for u in self._test_wildcard_iterator( | |
220 '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterAll( | |
221 expand_top_level_buckets=True)) | |
222 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | |
223 | |
224 def testPostRecursiveWildcard(self): | |
225 """Tests wildcard containing ** followed by an additional wildcard.""" | |
226 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( | |
227 'nested1/nested2/xyz2'))]) | |
228 actual_obj_uri_strs = set( | |
229 str(u) for u in self._test_wildcard_iterator( | |
230 '%s**/*y*2' % self.test_bucket0_uri.uri).IterAll( | |
231 expand_top_level_buckets=True)) | |
232 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) | |
233 | |
234 def testWildcardFields(self): | |
235 """Tests that wildcard w/fields specification returns correct fields.""" | |
236 blrs = set( | |
237 u for u in self._test_wildcard_iterator( | |
238 self.test_bucket0_uri.clone_replace_name('**')).IterAll( | |
239 bucket_listing_fields=['updated'])) | |
240 self.assertTrue(len(blrs)) | |
241 for blr in blrs: | |
242 self.assertTrue(blr.root_object and blr.root_object.updated) | |
243 blrs = set( | |
244 u for u in self._test_wildcard_iterator( | |
245 self.test_bucket0_uri.clone_replace_name('**')).IterAll( | |
246 bucket_listing_fields=['generation'])) | |
247 self.assertTrue(len(blrs)) | |
248 for blr in blrs: | |
249 self.assertTrue(blr.root_object and not blr.root_object.updated) | |
250 | |
251 | |
252 class FileIteratorTests(testcase.GsUtilUnitTestCase): | |
253 """Unit tests for FileWildcardIterator.""" | |
254 | |
255 def setUp(self): | |
256 """Creates a test dir with 3 files and one nested subdirectory + file.""" | |
257 super(FileIteratorTests, self).setUp() | |
258 | |
259 self.test_dir = self.CreateTempDir(test_files=[ | |
260 'abcd', 'abdd', 'ade$', ('dir1', 'dir2', 'zzz')]) | |
261 | |
262 self.root_files_uri_strs = set([ | |
263 suri(self.test_dir, 'abcd'), | |
264 suri(self.test_dir, 'abdd'), | |
265 suri(self.test_dir, 'ade$')]) | |
266 | |
267 self.subdirs_uri_strs = set([suri(self.test_dir, 'dir1')]) | |
268 | |
269 self.nested_files_uri_strs = set([ | |
270 suri(self.test_dir, 'dir1', 'dir2', 'zzz')]) | |
271 | |
272 self.immed_child_uri_strs = self.root_files_uri_strs | self.subdirs_uri_strs | |
273 self.all_file_uri_strs = ( | |
274 self.root_files_uri_strs | self.nested_files_uri_strs) | |
275 | |
276 def testContainsWildcard(self): | |
277 """Tests ContainsWildcard call.""" | |
278 self.assertTrue(ContainsWildcard('a*.txt')) | |
279 self.assertTrue(ContainsWildcard('a[0-9].txt')) | |
280 self.assertFalse(ContainsWildcard('0-9.txt')) | |
281 self.assertTrue(ContainsWildcard('?.txt')) | |
282 | |
283 def testNoOpDirectoryIterator(self): | |
284 """Tests that directory-only URI iterates just that one URI.""" | |
285 results = list( | |
286 self._test_wildcard_iterator(suri(tempfile.tempdir)).IterAll( | |
287 expand_top_level_buckets=True)) | |
288 self.assertEqual(1, len(results)) | |
289 self.assertEqual(suri(tempfile.tempdir), str(results[0])) | |
290 | |
291 def testMatchingAllFiles(self): | |
292 """Tests matching all files, based on wildcard.""" | |
293 uri = self._test_storage_uri(suri(self.test_dir, '*')) | |
294 actual_uri_strs = set(str(u) for u in | |
295 self._test_wildcard_iterator(uri).IterAll( | |
296 expand_top_level_buckets=True)) | |
297 self.assertEqual(self.immed_child_uri_strs, actual_uri_strs) | |
298 | |
299 def testMatchingFileSubset(self): | |
300 """Tests matching a subset of files, based on wildcard.""" | |
301 exp_uri_strs = set( | |
302 [suri(self.test_dir, 'abcd'), suri(self.test_dir, 'abdd')]) | |
303 uri = self._test_storage_uri(suri(self.test_dir, 'ab??')) | |
304 actual_uri_strs = set(str(u) for u in | |
305 self._test_wildcard_iterator(uri).IterAll( | |
306 expand_top_level_buckets=True)) | |
307 self.assertEqual(exp_uri_strs, actual_uri_strs) | |
308 | |
309 def testMatchingNonWildcardedUri(self): | |
310 """Tests matching a single named file.""" | |
311 exp_uri_strs = set([suri(self.test_dir, 'abcd')]) | |
312 uri = self._test_storage_uri(suri(self.test_dir, 'abcd')) | |
313 actual_uri_strs = set( | |
314 str(u) for u in self._test_wildcard_iterator(uri).IterAll( | |
315 expand_top_level_buckets=True)) | |
316 self.assertEqual(exp_uri_strs, actual_uri_strs) | |
317 | |
318 def testMatchingFilesIgnoringOtherRegexChars(self): | |
319 """Tests ignoring non-wildcard regex chars (e.g., ^ and $).""" | |
320 | |
321 exp_uri_strs = set([suri(self.test_dir, 'ade$')]) | |
322 uri = self._test_storage_uri(suri(self.test_dir, 'ad*$')) | |
323 actual_uri_strs = set( | |
324 str(u) for u in self._test_wildcard_iterator(uri).IterAll( | |
325 expand_top_level_buckets=True)) | |
326 self.assertEqual(exp_uri_strs, actual_uri_strs) | |
327 | |
328 def testRecursiveDirectoryOnlyWildcarding(self): | |
329 """Tests recursive expansion of directory-only '**' wildcard.""" | |
330 uri = self._test_storage_uri(suri(self.test_dir, '**')) | |
331 actual_uri_strs = set( | |
332 str(u) for u in self._test_wildcard_iterator(uri).IterAll( | |
333 expand_top_level_buckets=True)) | |
334 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) | |
335 | |
336 def testRecursiveDirectoryPlusFileWildcarding(self): | |
337 """Tests recursive expansion of '**' directory plus '*' wildcard.""" | |
338 uri = self._test_storage_uri(suri(self.test_dir, '**', '*')) | |
339 actual_uri_strs = set( | |
340 str(u) for u in self._test_wildcard_iterator(uri).IterAll( | |
341 expand_top_level_buckets=True)) | |
342 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) | |
343 | |
344 def testInvalidRecursiveDirectoryWildcard(self): | |
345 """Tests that wildcard containing '***' raises exception.""" | |
346 try: | |
347 uri = self._test_storage_uri(suri(self.test_dir, '***', 'abcd')) | |
348 for unused_ in self._test_wildcard_iterator(uri).IterAll( | |
349 expand_top_level_buckets=True): | |
350 self.fail('Expected WildcardException not raised.') | |
351 except wildcard_iterator.WildcardException, e: | |
352 # Expected behavior. | |
353 self.assertTrue(str(e).find('more than 2 consecutive') != -1) | |
354 | |
355 def testMissingDir(self): | |
356 """Tests that wildcard gets empty iterator when directory doesn't exist.""" | |
357 res = list( | |
358 self._test_wildcard_iterator(suri('no_such_dir', '*')).IterAll( | |
359 expand_top_level_buckets=True)) | |
360 self.assertEqual(0, len(res)) | |
361 | |
362 def testExistingDirNoFileMatch(self): | |
363 """Tests that wildcard returns empty iterator when there's no match.""" | |
364 uri = self._test_storage_uri( | |
365 suri(self.test_dir, 'non_existent*')) | |
366 res = list(self._test_wildcard_iterator(uri).IterAll( | |
367 expand_top_level_buckets=True)) | |
368 self.assertEqual(0, len(res)) | |
OLD | NEW |