Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(131)

Side by Side Diff: third_party/gsutil/gslib/test_wildcard_iterator.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 #
3 # Copyright 2010 Google Inc.
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish, dis-
9 # tribute, sublicense, and/or sell copies of the Software, and to permit
10 # persons to whom the Software is furnished to do so, subject to the fol-
11 # lowing conditions:
12 #
13 # The above copyright notice and this permission notice shall be included
14 # in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 # IN THE SOFTWARE.
23
24 """Unit tests for gslib wildcard_iterator"""
25
26 import os
27 import shutil
28 import sys
29 import tempfile
30 import time
31 import unittest
32
33 # Put local libs at front of path so tests will run latest lib code rather
34 # than whatever code is found on user's PYTHONPATH.
35 sys.path.insert(0, '.')
36 sys.path.insert(0, 'boto')
37 from boto import InvalidUriError
38 from gslib import test_util
39 from gslib import wildcard_iterator
40 from gslib.project_id import ProjectIdHandler
41 from tests.integration.s3 import mock_storage_service
42 from wildcard_iterator import ContainsWildcard
43
44
45 class CloudWildcardIteratorTests(unittest.TestCase):
46 """CloudWildcardIterator test suite"""
47
48 def GetSuiteDescription(self):
49 return 'CloudWildcardIterator test suite'
50
51 @classmethod
52 def SetUpClass(cls):
53 """Creates 2 mock buckets, each containing 4 objects, including 1 nested."""
54 cls.immed_child_obj_names = ['abcd', 'abdd', 'ade$']
55 cls.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1',
56 'nested1/nested2/xyz2', 'nested1/nfile_abc']
57 cls.base_uri_str = 'gs://gslib_test_%d' % int(time.time())
58 cls.test_bucket0_uri, cls.test_bucket0_obj_uri_strs = (
59 cls.__SetUpOneMockBucket(0)
60 )
61 cls.test_bucket1_uri, cls.test_bucket1_obj_uri_strs = (
62 cls.__SetUpOneMockBucket(1)
63 )
64 cls.created_test_data = True
65
66 @classmethod
67 def __SetUpOneMockBucket(cls, bucket_num):
68 """Creates a mock bucket containing 4 objects, including 1 nested.
69 Args:
70 bucket_num: Number for building bucket name.
71
72 Returns:
73 tuple: (bucket name, set of object URI strings).
74 """
75 bucket_uri = test_util.test_storage_uri(
76 '%s_%s' % (cls.base_uri_str, bucket_num))
77 bucket_uri.create_bucket()
78 obj_uri_strs = set()
79 for obj_name in cls.all_obj_names:
80 obj_uri = test_util.test_storage_uri('%s%s' % (bucket_uri, obj_name))
81 key = obj_uri.new_key()
82 key.set_contents_from_string('')
83 obj_uri_strs.add(str(obj_uri))
84 return (bucket_uri, obj_uri_strs)
85
86 @classmethod
87 def TearDownClass(cls):
88 """Cleans up bucket and objects created by SetUpClass"""
89 if hasattr(cls, 'created_test_data'):
90 for test_obj_uri_str in cls.test_bucket0_obj_uri_strs:
91 test_util.test_storage_uri(test_obj_uri_str).delete_key()
92 for test_obj_uri_str in cls.test_bucket1_obj_uri_strs:
93 test_util.test_storage_uri(test_obj_uri_str).delete_key()
94 cls.test_bucket0_uri.delete_bucket()
95 cls.test_bucket1_uri.delete_bucket()
96
97 def TestNoOpObjectIterator(self):
98 """Tests that bucket-only URI iterates just that one URI"""
99 results = list(
100 test_util.test_wildcard_iterator(self.test_bucket0_uri).IterUris())
101 self.assertEqual(1, len(results))
102 self.assertEqual(str(self.test_bucket0_uri), str(results[0]))
103
104 def TestMatchingAllObjects(self):
105 """Tests matching all objects, based on wildcard"""
106 actual_obj_uri_strs = set(
107 str(u) for u in test_util.test_wildcard_iterator(
108 self.test_bucket0_uri.clone_replace_name('**')).IterUris())
109 self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs)
110
111 def TestMatchingObjectSubset(self):
112 """Tests matching a subset of objects, based on wildcard"""
113 exp_obj_uri_strs = set(
114 [str(self.test_bucket0_uri.clone_replace_name('abcd')),
115 str(self.test_bucket0_uri.clone_replace_name('abdd'))])
116 actual_obj_uri_strs = set(
117 str(u) for u in test_util.test_wildcard_iterator(
118 self.test_bucket0_uri.clone_replace_name('ab??')).IterUris())
119 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
120
121 def TestMatchingNonWildcardedUri(self):
122 """Tests matching a single named object"""
123 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd')
124 )])
125 actual_obj_uri_strs = set(
126 str(u) for u in test_util.test_wildcard_iterator(
127 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris())
128 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
129
130 def TestWildcardedObjectUriWithVsWithoutPrefix(self):
131 """Tests that wildcarding w/ and w/o server prefix get same result"""
132 # (It's just more efficient to query w/o a prefix; wildcard
133 # iterator will filter the matches either way.)
134 with_prefix_uri_strs = set(
135 str(u) for u in test_util.test_wildcard_iterator(
136 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris())
137 # By including a wildcard at the start of the string no prefix can be
138 # used in server request.
139 no_prefix_uri_strs = set(
140 str(u) for u in test_util.test_wildcard_iterator(
141 self.test_bucket0_uri.clone_replace_name('?bcd')).IterUris())
142 self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs)
143
144 def TestWildcardedObjectUriNestedSubdirMatch(self):
145 """Tests wildcarding with a nested subdir"""
146 uri_strs = set()
147 prefixes = set()
148 for blr in test_util.test_wildcard_iterator(
149 self.test_bucket0_uri.clone_replace_name('*')):
150 if blr.HasPrefix():
151 prefixes.add(blr.GetPrefix().name)
152 else:
153 uri_strs.add(blr.GetUri().uri)
154 exp_obj_uri_strs = set(['%s_0/%s' % (self.base_uri_str, x)
155 for x in self.immed_child_obj_names])
156 self.assertEqual(exp_obj_uri_strs, uri_strs)
157 self.assertEqual(1, len(prefixes))
158 self.assertTrue('nested1/' in prefixes)
159
160 def TestWildcardedObjectUriNestedSubSubdirMatch(self):
161 """Tests wildcarding with a nested sub-subdir"""
162 for final_char in ('', '/'):
163 uri_strs = set()
164 prefixes = set()
165 for blr in test_util.test_wildcard_iterator(
166 self.test_bucket0_uri.clone_replace_name('nested1/*%s' % final_char)):
167 if blr.HasPrefix():
168 prefixes.add(blr.GetPrefix().name)
169 else:
170 uri_strs.add(blr.GetUri().uri)
171 self.assertEqual(1, len(uri_strs))
172 self.assertEqual(1, len(prefixes))
173 self.assertTrue('nested1/nested2/' in prefixes)
174
175 def TestWildcardPlusSubdirMatch(self):
176 """Tests gs://bucket/*/subdir matching"""
177 actual_uri_strs = set()
178 actual_prefixes = set()
179 for blr in test_util.test_wildcard_iterator(
180 self.test_bucket0_uri.clone_replace_name('*/nested1')):
181 if blr.HasPrefix():
182 actual_prefixes.add(blr.GetPrefix().name)
183 else:
184 actual_uri_strs.add(blr.GetUri().uri)
185 expected_uri_strs = set()
186 expected_prefixes = set(['nested1/'])
187 self.assertEqual(expected_prefixes, actual_prefixes)
188 self.assertEqual(expected_uri_strs, actual_uri_strs)
189
190 def TestWildcardPlusSubdirSubdirMatch(self):
191 """Tests gs://bucket/*/subdir/* matching"""
192 actual_uri_strs = set()
193 actual_prefixes = set()
194 for blr in test_util.test_wildcard_iterator(
195 self.test_bucket0_uri.clone_replace_name('*/nested2/*')):
196 if blr.HasPrefix():
197 actual_prefixes.add(blr.GetPrefix().name)
198 else:
199 actual_uri_strs.add(blr.GetUri().uri)
200 expected_uri_strs = set([
201 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri,
202 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri])
203 expected_prefixes = set()
204 self.assertEqual(expected_prefixes, actual_prefixes)
205 self.assertEqual(expected_uri_strs, actual_uri_strs)
206
207 def TestNoMatchingWildcardedObjectUri(self):
208 """Tests that get back an empty iterator for non-matching wildcarded URI"""
209 res = list(test_util.test_wildcard_iterator(
210 self.test_bucket0_uri.clone_replace_name('*x0')).IterUris())
211 self.assertEqual(0, len(res))
212
213 def TestWildcardedInvalidObjectUri(self):
214 """Tests that we raise an exception for wildcarded invalid URI"""
215 try:
216 for unused_ in test_util.test_wildcard_iterator(
217 'badscheme://asdf').IterUris():
218 self.assertFalse('Expected InvalidUriError not raised.')
219 except InvalidUriError, e:
220 # Expected behavior.
221 self.assertTrue(e.message.find('Unrecognized scheme') != -1)
222
223 def TestSingleMatchWildcardedBucketUri(self):
224 """Tests matching a single bucket based on a wildcarded bucket URI"""
225 exp_obj_uri_strs = set(['%s_1/' % self.base_uri_str])
226 actual_obj_uri_strs = set(
227 str(u) for u in test_util.test_wildcard_iterator(
228 '%s*1' % self.base_uri_str).IterUris())
229 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
230
231 def TestMultiMatchWildcardedBucketUri(self):
232 """Tests matching a multiple buckets based on a wildcarded bucket URI"""
233 exp_obj_uri_strs = set(['%s_%s/' %
234 (self.base_uri_str, i) for i in range(2)])
235 actual_obj_uri_strs = set(
236 str(u) for u in test_util.test_wildcard_iterator(
237 '%s*' % self.base_uri_str).IterUris())
238 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
239
240 def TestWildcardBucketAndObjectUri(self):
241 """Tests matching with both bucket and object wildcards"""
242 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
243 'abcd'))])
244 actual_obj_uri_strs = set(
245 str(u) for u in test_util.test_wildcard_iterator(
246 '%s_0*/abc*' % self.base_uri_str).IterUris())
247 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
248
249 def TestWildcardUpToFinalCharSubdirPlusObjectName(self):
250 """Tests wildcard subd*r/obj name"""
251 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
252 'nested1/nested2/xyz1'))])
253 actual_obj_uri_strs = set(
254 str(u) for u in test_util.test_wildcard_iterator(
255 '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterUris())
256 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
257
258 def TestPostRecursiveWildcard(self):
259 """Tests that wildcard containing ** followed by an additional wildcard work s"""
260 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
261 'nested1/nested2/xyz2'))])
262 actual_obj_uri_strs = set(
263 str(u) for u in test_util.test_wildcard_iterator(
264 '%s**/*y*2' % self.test_bucket0_uri.uri).IterUris())
265 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
266
267 def TestCallingGetKeyOnProviderOnlyWildcardIteration(self):
268 """Tests that attempting iterating provider-only wildcard raises"""
269 try:
270 from gslib.bucket_listing_ref import BucketListingRefException
271 for iter_result in wildcard_iterator.wildcard_iterator(
272 'gs://', ProjectIdHandler(),
273 bucket_storage_uri_class=mock_storage_service.MockBucketStorageUri):
274 iter_result.GetKey()
275 self.fail('Expected BucketListingRefException not raised.')
276 except BucketListingRefException, e:
277 self.assertTrue(str(e).find(
278 'Attempt to call GetKey() on Key-less BucketListingRef') != -1)
279
280
281 class FileIteratorTests(unittest.TestCase):
282 """FileWildcardIterator test suite"""
283
284 def GetSuiteDescription(self):
285 return 'FileWildcardIterator test suite'
286
287 @classmethod
288 def SetUpClass(cls):
289 """
290 Creates a test dir containing 3 files and one nested subdirectory + file.
291 """
292
293 # Create the test directories.
294 cls.test_dir = tempfile.mkdtemp()
295 nested_subdir = '%s%sdir1%sdir2' % (cls.test_dir, os.sep, os.sep)
296 os.makedirs(nested_subdir)
297
298 # Create the test files.
299 immed_child_filenames = ['abcd', 'abdd', 'ade$', 'dir1']
300 immed_child_filepaths = ['%s%s%s' % (cls.test_dir, os.sep, f)
301 for f in immed_child_filenames]
302 filenames = ['abcd', 'abdd', 'ade$', 'dir1%sdir2%szzz' % (os.sep, os.sep)]
303 filepaths = ['%s%s%s' % (cls.test_dir, os.sep, f) for f in filenames]
304 for filepath in filepaths:
305 open(filepath, 'w')
306
307 # Set up global test variables.
308 cls.immed_child_uri_strs = set(
309 os.path.join('file://%s' % f) for f in immed_child_filepaths
310 )
311
312 cls.all_file_uri_strs = set(
313 [('file://%s' % o) for o in filepaths]
314 )
315
316 cls.all_uri_strs = set(
317 ['file://%s' % nested_subdir]
318 ).union(cls.all_file_uri_strs)
319
320 @classmethod
321 def TearDownClass(cls):
322 """Cleans up test dir and file created by SetUpClass"""
323 if hasattr(cls, 'test_dir'):
324 shutil.rmtree(cls.test_dir)
325
326 def TestContainsWildcard(self):
327 """Tests ContainsWildcard call"""
328 self.assertTrue(ContainsWildcard('a*.txt'))
329 self.assertTrue(ContainsWildcard('a[0-9].txt'))
330 self.assertFalse(ContainsWildcard('0-9.txt'))
331 self.assertTrue(ContainsWildcard('?.txt'))
332
333 def TestNoOpDirectoryIterator(self):
334 """Tests that directory-only URI iterates just that one URI"""
335 results = list(test_util.test_wildcard_iterator('file:///tmp/').IterUris())
336 self.assertEqual(1, len(results))
337 self.assertEqual('file:///tmp/', str(results[0]))
338
339 def TestMatchingAllFiles(self):
340 """Tests matching all files, based on wildcard"""
341 uri = test_util.test_storage_uri('file://%s/*' % self.test_dir)
342 actual_uri_strs = set(str(u) for u in
343 test_util.test_wildcard_iterator(uri).IterUris()
344 )
345 self.assertEqual(self.immed_child_uri_strs, actual_uri_strs)
346
347 def TestMatchingFileSubset(self):
348 """Tests matching a subset of files, based on wildcard"""
349 exp_uri_strs = set(
350 ['file://%s/abcd' % self.test_dir, 'file://%s/abdd' % self.test_dir]
351 )
352 uri = test_util.test_storage_uri('file://%s/ab??' % self.test_dir)
353 actual_uri_strs = set(str(u) for u in
354 test_util.test_wildcard_iterator(uri).IterUris()
355 )
356 self.assertEqual(exp_uri_strs, actual_uri_strs)
357
358 def TestMatchingNonWildcardedUri(self):
359 """Tests matching a single named file"""
360 exp_uri_strs = set(['file://%s/abcd' % self.test_dir])
361 uri = test_util.test_storage_uri('file://%s/abcd' % self.test_dir)
362 actual_uri_strs = set(
363 str(u) for u in test_util.test_wildcard_iterator(uri).IterUris())
364 self.assertEqual(exp_uri_strs, actual_uri_strs)
365
366 def TestMatchingFilesIgnoringOtherRegexChars(self):
367 """Tests ignoring non-wildcard regex chars (e.g., ^ and $)"""
368
369 exp_uri_strs = set(['file://%s/ade$' % self.test_dir])
370 uri = test_util.test_storage_uri('file://%s/ad*$' % self.test_dir)
371 actual_uri_strs = set(
372 str(u) for u in test_util.test_wildcard_iterator(uri).IterUris())
373 self.assertEqual(exp_uri_strs, actual_uri_strs)
374
375 def TestRecursiveDirectoryOnlyWildcarding(self):
376 """Tests recusive expansion of directory-only '**' wildcard"""
377 uri = test_util.test_storage_uri('file://%s/**' % self.test_dir)
378 actual_uri_strs = set(
379 str(u) for u in test_util.test_wildcard_iterator(uri).IterUris())
380 self.assertEqual(self.all_file_uri_strs, actual_uri_strs)
381
382 def TestRecursiveDirectoryPlusFileWildcarding(self):
383 """Tests recusive expansion of '**' directory plus '*' wildcard"""
384 uri = test_util.test_storage_uri('file://%s/**/*' % self.test_dir)
385 actual_uri_strs = set(
386 str(u) for u in test_util.test_wildcard_iterator(uri).IterUris())
387 self.assertEqual(self.all_file_uri_strs, actual_uri_strs)
388
389 def TestInvalidRecursiveDirectoryWildcard(self):
390 """Tests that wildcard containing '***' raises exception"""
391 try:
392 uri = test_util.test_storage_uri('file://%s/***/abcd' % self.test_dir)
393 for unused_ in test_util.test_wildcard_iterator(uri).IterUris():
394 self.fail('Expected WildcardException not raised.')
395 except wildcard_iterator.WildcardException, e:
396 # Expected behavior.
397 self.assertTrue(str(e).find('more than 2 consecutive') != -1)
398
399 def TestMissingDir(self):
400 """Tests that wildcard gets empty iterator when directory doesn't exist"""
401 res = list(
402 test_util.test_wildcard_iterator('file://no_such_dir/*').IterUris())
403 self.assertEqual(0, len(res))
404
405 def TestExistingDirNoFileMatch(self):
406 """Tests that wildcard returns empty iterator when there's no match"""
407 uri = test_util.test_storage_uri(
408 'file://%s/non_existent*' % self.test_dir)
409 res = list(test_util.test_wildcard_iterator(uri).IterUris())
410 self.assertEqual(0, len(res))
411
412
413 if __name__ == '__main__':
414 if sys.version_info[:3] < (2, 5, 1):
415 sys.exit('These tests must be run on at least Python 2.5.1\n')
416 test_loader = unittest.TestLoader()
417 test_loader.testMethodPrefix = 'Test'
418 for suite in (test_loader.loadTestsFromTestCase(CloudWildcardIteratorTests),
419 test_loader.loadTestsFromTestCase(FileIteratorTests)):
420 # Seems like there should be a cleaner way to find the test_class.
421 test_class = suite.__getattribute__('_tests')[0]
422 # We call SetUpClass() and TearDownClass() ourselves because we
423 # don't assume the user has Python 2.7 (which supports classmethods
424 # that do it, with camelCase versions of these names).
425 try:
426 print 'Setting up %s...' % test_class.GetSuiteDescription()
427 test_class.SetUpClass()
428 print 'Running %s...' % test_class.GetSuiteDescription()
429 unittest.TextTestRunner(verbosity=2).run(suite)
430 finally:
431 print 'Cleaning up after %s...' % test_class.GetSuiteDescription()
432 test_class.TearDownClass()
433 print ''
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698