Index: tests/download_from_google_storage_unittests.py |
diff --git a/tests/download_from_google_storage_unittests.py b/tests/download_from_google_storage_unittests.py |
index a8af63b0c74cbf9913c396103b8c04a3a0f230c8..0420ad94289def64e0d00836994eddce6cd79047 100755 |
--- a/tests/download_from_google_storage_unittests.py |
+++ b/tests/download_from_google_storage_unittests.py |
@@ -11,6 +11,7 @@ import os |
import Queue |
import shutil |
import sys |
+import tarfile |
import tempfile |
import threading |
import unittest |
@@ -59,6 +60,21 @@ class GsutilMock(object): |
return (0, '', '') |
+class ChangedWorkingDirectory(object): |
+ def __init__(self, working_directory): |
+ self._old_cwd = '' |
+ self._working_directory = working_directory |
+ |
+ def __enter__(self): |
+ self._old_cwd = os.getcwd() |
+ print "Enter directory = ", self._working_directory |
+ os.chdir(self._working_directory) |
+ |
+ def __exit__(self, *_): |
+ print "Enter directory = ", self._old_cwd |
+ os.chdir(self._old_cwd) |
+ |
+ |
class GstoolsUnitTests(unittest.TestCase): |
def setUp(self): |
self.temp_dir = tempfile.mkdtemp(prefix='gstools_test') |
@@ -68,6 +84,49 @@ class GstoolsUnitTests(unittest.TestCase): |
def cleanUp(self): |
shutil.rmtree(self.temp_dir) |
+ def test_validate_tar_file(self): |
+ lorem_ipsum = os.path.join(self.base_path, 'lorem_ipsum.txt') |
+ with ChangedWorkingDirectory(self.base_path): |
+ # Sanity ok check. |
+ tar_dir = 'ok_dir' |
+ os.makedirs(os.path.join(self.base_path, tar_dir)) |
+ tar = 'good.tar.gz' |
+ lorem_ipsum_copy = os.path.join(tar_dir, 'lorem_ipsum.txt') |
+ shutil.copyfile(lorem_ipsum, lorem_ipsum_copy) |
+ with tarfile.open(tar, 'w:gz') as tar: |
+ tar.add(lorem_ipsum_copy) |
+ self.assertTrue( |
+ download_from_google_storage._validate_tar_file(tar, tar_dir)) |
+ |
+ # Test no links. |
+ tar_dir_link = 'for_tar_link' |
+ os.makedirs(tar_dir_link) |
+ link = os.path.join(tar_dir_link, 'link') |
+ os.symlink(lorem_ipsum, link) |
+ tar_with_links = 'with_links.tar.gz' |
+ with tarfile.open(tar_with_links, 'w:gz') as tar: |
+ tar.add(link) |
+ self.assertFalse( |
+ download_from_google_storage._validate_tar_file(tar, tar_dir_link)) |
+ |
+ # Test not outside. |
+ tar_dir_outside = 'outside_tar' |
+ os.makedirs(tar_dir_outside) |
+ tar_with_outside = 'with_outside.tar.gz' |
+ with tarfile.open(tar_with_outside, 'w:gz') as tar: |
+ tar.add(lorem_ipsum) |
+ self.assertFalse( |
+ download_from_google_storage._validate_tar_file(tar, |
+ tar_dir_outside)) |
+ # Test no .. |
+ tar_with_dotdot = 'with_dotdot.tar.gz' |
+ dotdot_file = os.path.join(tar_dir, '..', tar_dir, 'lorem_ipsum.txt') |
+ with tarfile.open(tar_with_dotdot, 'w:gz') as tar: |
+ tar.add(dotdot_file) |
+ self.assertFalse( |
+ download_from_google_storage._validate_tar_file(tar, |
+ tar_dir)) |
+ |
def test_gsutil(self): |
gsutil = download_from_google_storage.Gsutil(GSUTIL_DEFAULT_PATH, None) |
self.assertEqual(gsutil.path, GSUTIL_DEFAULT_PATH) |
@@ -164,7 +223,7 @@ class DownloadTests(unittest.TestCase): |
stdout_queue = Queue.Queue() |
download_from_google_storage._downloader_worker_thread( |
0, self.queue, False, self.base_url, self.gsutil, |
- stdout_queue, self.ret_codes, True) |
+ stdout_queue, self.ret_codes, True, False) |
expected_calls = [ |
('check_call', |
('ls', input_filename)), |
@@ -190,13 +249,53 @@ class DownloadTests(unittest.TestCase): |
stdout_queue = Queue.Queue() |
download_from_google_storage._downloader_worker_thread( |
0, self.queue, False, self.base_url, self.gsutil, |
- stdout_queue, self.ret_codes, True) |
+ stdout_queue, self.ret_codes, True, False) |
expected_output = [ |
'0> File %s exists and SHA1 matches. Skipping.' % output_filename |
] |
self.assertEqual(list(stdout_queue.queue), expected_output) |
self.assertEqual(self.gsutil.history, []) |
+ def test_download_extract_archive(self): |
+ # By design we make this not match |
+ sha1_hash = '61223e1ad3d86901a57629fee38313db5ec106ff' |
+ input_filename = '%s/%s' % (self.base_url, sha1_hash) |
+ # Generate a gzipped tarfile |
+ output_filename = os.path.join(self.base_path, 'subfolder.tar.gz') |
+ output_dirname = os.path.join(self.base_path, 'subfolder') |
+ extracted_filename = os.path.join(output_dirname, 'subfolder_text.txt') |
+ with tarfile.open(output_filename, 'w:gz') as tar: |
+ tar.add(output_dirname, arcname='subfolder') |
+ shutil.rmtree(output_dirname) |
+ print(output_dirname) |
+ self.queue.put((sha1_hash, output_filename)) |
+ self.queue.put((None, None)) |
+ stdout_queue = Queue.Queue() |
+ download_from_google_storage._downloader_worker_thread( |
+ 0, self.queue, False, self.base_url, self.gsutil, |
+ stdout_queue, self.ret_codes, True, True, delete=False) |
+ expected_calls = [ |
+ ('check_call', |
+ ('ls', input_filename)), |
+ ('check_call', |
+ ('cp', input_filename, output_filename))] |
+ if sys.platform != 'win32': |
+ expected_calls.append( |
+ ('check_call', |
+ ('stat', |
+ 'gs://sometesturl/61223e1ad3d86901a57629fee38313db5ec106ff'))) |
+ expected_output = [ |
+ '0> Downloading %s...' % output_filename] |
+ expected_output.extend([ |
+ '0> Extracting 3 entries from %s to %s' % (output_filename, |
+ output_dirname)]) |
+ expected_ret_codes = [] |
+ self.assertEqual(list(stdout_queue.queue), expected_output) |
+ self.assertEqual(self.gsutil.history, expected_calls) |
+ self.assertEqual(list(self.ret_codes.queue), expected_ret_codes) |
+ self.assertTrue(os.path.exists(output_dirname)) |
+ self.assertTrue(os.path.exists(extracted_filename)) |
+ |
def test_download_worker_skips_not_found_file(self): |
sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f' |
input_filename = '%s/%s' % (self.base_url, sha1_hash) |
@@ -207,7 +306,7 @@ class DownloadTests(unittest.TestCase): |
self.gsutil.add_expected(1, '', '') # Return error when 'ls' is called. |
download_from_google_storage._downloader_worker_thread( |
0, self.queue, False, self.base_url, self.gsutil, |
- stdout_queue, self.ret_codes, True) |
+ stdout_queue, self.ret_codes, True, False) |
expected_output = [ |
'0> Failed to fetch file %s for %s, skipping. [Err: ]' % ( |
input_filename, output_filename), |
@@ -242,7 +341,8 @@ class DownloadTests(unittest.TestCase): |
ignore_errors=False, |
sha1_file=False, |
verbose=True, |
- auto_platform=False) |
+ auto_platform=False, |
+ extract=False) |
expected_calls = [ |
('check_call', |
('ls', input_filename)), |
@@ -273,7 +373,8 @@ class DownloadTests(unittest.TestCase): |
ignore_errors=False, |
sha1_file=False, |
verbose=True, |
- auto_platform=False) |
+ auto_platform=False, |
+ extract=False) |
expected_calls = [ |
('check_call', |
('ls', input_filename)), |