| Index: gslib/tests/test_hashing_helper.py
|
| ===================================================================
|
| --- gslib/tests/test_hashing_helper.py (revision 0)
|
| +++ gslib/tests/test_hashing_helper.py (revision 0)
|
| @@ -0,0 +1,246 @@
|
| +# -*- coding: utf-8 -*-
|
| +# Copyright 2014 Google Inc. All Rights Reserved.
|
| +#
|
| +# Licensed under the Apache License, Version 2.0 (the "License");
|
| +# you may not use this file except in compliance with the License.
|
| +# You may obtain a copy of the License at
|
| +#
|
| +# http://www.apache.org/licenses/LICENSE-2.0
|
| +#
|
| +# Unless required by applicable law or agreed to in writing, software
|
| +# distributed under the License is distributed on an "AS IS" BASIS,
|
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| +# See the License for the specific language governing permissions and
|
| +# limitations under the License.
|
| +"""Unit tests for hashing helper functions and classes."""
|
| +
|
| +from __future__ import absolute_import
|
| +
|
| +from hashlib import md5
|
| +import os
|
| +import pkgutil
|
| +
|
| +from gslib.exception import CommandException
|
| +from gslib.hashing_helper import CalculateMd5FromContents
|
| +from gslib.hashing_helper import HashingFileUploadWrapper
|
| +import gslib.tests.testcase as testcase
|
| +from gslib.util import StorageUrlFromString
|
| +from gslib.util import TRANSFER_BUFFER_SIZE
|
| +
|
| +
|
| +_TEST_FILE = 'test.txt'
|
| +
|
| +
|
| +class TestHashingFileUploadWrapper(testcase.GsUtilUnitTestCase):
|
| + """Unit tests for the HashingFileUploadWrapper class."""
|
| +
|
| + _temp_test_file = None
|
| + _dummy_url = StorageUrlFromString('gs://bucket/object')
|
| +
|
| + def _GetTestFile(self):
|
| + contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % _TEST_FILE)
|
| + if not self._temp_test_file:
|
| + self._temp_test_file = self.CreateTempFile(
|
| + file_name=_TEST_FILE, contents=contents)
|
| + return self._temp_test_file
|
| +
|
| + def testReadToEOF(self):
|
| + digesters = {'md5': md5()}
|
| + tmp_file = self.CreateTempFile(contents='a' * TRANSFER_BUFFER_SIZE * 4)
|
| + with open(tmp_file, 'rb') as stream:
|
| + wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
|
| + self._dummy_url, self.logger)
|
| + wrapper.read()
|
| + with open(tmp_file, 'rb') as stream:
|
| + actual = CalculateMd5FromContents(stream)
|
| + self.assertEqual(actual, digesters['md5'].hexdigest())
|
| +
|
| + def _testSeekBack(self, initial_position, seek_back_amount):
|
| + """Tests reading then seeking backwards.
|
| +
|
| + This function simulates an upload that is resumed after a connection break.
|
| + It reads one transfer buffer at a time until it reaches initial_position,
|
| + then seeks backwards (as if the server did not receive some of the bytes)
|
| + and reads to the end of the file, ensuring the hash matches the original
|
| + file upon completion.
|
| +
|
| + Args:
|
| + initial_position: Initial number of bytes to read before seek.
|
| + seek_back_amount: Number of bytes to seek backward.
|
| +
|
| + Raises:
|
| + AssertionError on wrong amount of data remaining or hash mismatch.
|
| + """
|
| + tmp_file = self._GetTestFile()
|
| + tmp_file_len = os.path.getsize(tmp_file)
|
| +
|
| + self.assertGreaterEqual(
|
| + initial_position, seek_back_amount,
|
| + 'seek_back_amount must be less than initial position %s '
|
| + '(but was actually: %s)' % (initial_position, seek_back_amount))
|
| + self.assertLess(
|
| + initial_position, tmp_file_len,
|
| + 'initial_position must be less than test file size %s '
|
| + '(but was actually: %s)' % (tmp_file_len, initial_position))
|
| +
|
| + digesters = {'md5': md5()}
|
| + with open(tmp_file, 'rb') as stream:
|
| + wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
|
| + self._dummy_url, self.logger)
|
| + position = 0
|
| + while position < initial_position - TRANSFER_BUFFER_SIZE:
|
| + data = wrapper.read(TRANSFER_BUFFER_SIZE)
|
| + position += len(data)
|
| + wrapper.read(initial_position - position)
|
| + wrapper.seek(initial_position - seek_back_amount)
|
| + self.assertEqual(wrapper.tell(),
|
| + initial_position - seek_back_amount)
|
| + data = wrapper.read()
|
| + self.assertEqual(
|
| + len(data), tmp_file_len - (initial_position - seek_back_amount))
|
| + with open(tmp_file, 'rb') as stream:
|
| + actual = CalculateMd5FromContents(stream)
|
| + self.assertEqual(actual, digesters['md5'].hexdigest())
|
| +
|
| + def testSeekToBeginning(self):
|
| + for num_bytes in (TRANSFER_BUFFER_SIZE - 1,
|
| + TRANSFER_BUFFER_SIZE,
|
| + TRANSFER_BUFFER_SIZE + 1,
|
| + TRANSFER_BUFFER_SIZE * 2 - 1,
|
| + TRANSFER_BUFFER_SIZE * 2,
|
| + TRANSFER_BUFFER_SIZE * 2 + 1,
|
| + TRANSFER_BUFFER_SIZE * 3 - 1,
|
| + TRANSFER_BUFFER_SIZE * 3,
|
| + TRANSFER_BUFFER_SIZE * 3 + 1):
|
| + self._testSeekBack(num_bytes, num_bytes)
|
| +
|
| + def testSeekBackAroundOneBuffer(self):
|
| + for initial_position in (TRANSFER_BUFFER_SIZE + 1,
|
| + TRANSFER_BUFFER_SIZE * 2 - 1,
|
| + TRANSFER_BUFFER_SIZE * 2,
|
| + TRANSFER_BUFFER_SIZE * 2 + 1,
|
| + TRANSFER_BUFFER_SIZE * 3 - 1,
|
| + TRANSFER_BUFFER_SIZE * 3,
|
| + TRANSFER_BUFFER_SIZE * 3 + 1):
|
| + for seek_back_amount in (TRANSFER_BUFFER_SIZE - 1,
|
| + TRANSFER_BUFFER_SIZE,
|
| + TRANSFER_BUFFER_SIZE + 1):
|
| + self._testSeekBack(initial_position, seek_back_amount)
|
| +
|
| + def testSeekBackMoreThanOneBuffer(self):
|
| + for initial_position in (TRANSFER_BUFFER_SIZE * 2 + 1,
|
| + TRANSFER_BUFFER_SIZE * 3 - 1,
|
| + TRANSFER_BUFFER_SIZE * 3,
|
| + TRANSFER_BUFFER_SIZE * 3 + 1):
|
| + for seek_back_amount in (TRANSFER_BUFFER_SIZE * 2 - 1,
|
| + TRANSFER_BUFFER_SIZE * 2,
|
| + TRANSFER_BUFFER_SIZE * 2 + 1):
|
| + self._testSeekBack(initial_position, seek_back_amount)
|
| +
|
| + def _testSeekForward(self, initial_seek):
|
| + """Tests seeking to an initial position and then reading.
|
| +
|
| + This function simulates an upload that is resumed after a process break.
|
| + It seeks from zero to the initial position (as if the server already had
|
| + those bytes). Then it reads to the end of the file, ensuring the hash
|
| + matches the original file upon completion.
|
| +
|
| + Args:
|
| + initial_seek: Number of bytes to initially seek.
|
| +
|
| + Raises:
|
| + AssertionError on wrong amount of data remaining or hash mismatch.
|
| + """
|
| + tmp_file = self._GetTestFile()
|
| + tmp_file_len = os.path.getsize(tmp_file)
|
| +
|
| + self.assertLess(
|
| + initial_seek, tmp_file_len,
|
| + 'initial_seek must be less than test file size %s '
|
| + '(but was actually: %s)' % (tmp_file_len, initial_seek))
|
| +
|
| + digesters = {'md5': md5()}
|
| + with open(tmp_file, 'rb') as stream:
|
| + wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
|
| + self._dummy_url, self.logger)
|
| + wrapper.seek(initial_seek)
|
| + self.assertEqual(wrapper.tell(), initial_seek)
|
| + data = wrapper.read()
|
| + self.assertEqual(len(data), tmp_file_len - initial_seek)
|
| + with open(tmp_file, 'rb') as stream:
|
| + actual = CalculateMd5FromContents(stream)
|
| + self.assertEqual(actual, digesters['md5'].hexdigest())
|
| +
|
| + def testSeekForward(self):
|
| + for initial_seek in (0,
|
| + TRANSFER_BUFFER_SIZE - 1,
|
| + TRANSFER_BUFFER_SIZE,
|
| + TRANSFER_BUFFER_SIZE + 1,
|
| + TRANSFER_BUFFER_SIZE * 2 - 1,
|
| + TRANSFER_BUFFER_SIZE * 2,
|
| + TRANSFER_BUFFER_SIZE * 2 + 1):
|
| + self._testSeekForward(initial_seek)
|
| +
|
| + def _testSeekAway(self, initial_read):
|
| + """Tests reading to an initial position and then seeking to EOF and back.
|
| +
|
| + This function simulates an size check on the input file by seeking to the
|
| + end of the file and then back to the current position. Then it reads to
|
| + the end of the file, ensuring the hash matches the original file upon
|
| + completion.
|
| +
|
| + Args:
|
| + initial_read: Number of bytes to initially read.
|
| +
|
| + Raises:
|
| + AssertionError on wrong amount of data remaining or hash mismatch.
|
| + """
|
| + tmp_file = self._GetTestFile()
|
| + tmp_file_len = os.path.getsize(tmp_file)
|
| +
|
| + self.assertLess(
|
| + initial_read, tmp_file_len,
|
| + 'initial_read must be less than test file size %s '
|
| + '(but was actually: %s)' % (tmp_file_len, initial_read))
|
| +
|
| + digesters = {'md5': md5()}
|
| + with open(tmp_file, 'rb') as stream:
|
| + wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
|
| + self._dummy_url, self.logger)
|
| + wrapper.read(initial_read)
|
| + self.assertEqual(wrapper.tell(), initial_read)
|
| + wrapper.seek(0, os.SEEK_END)
|
| + self.assertEqual(wrapper.tell(), tmp_file_len)
|
| + wrapper.seek(initial_read, os.SEEK_SET)
|
| + data = wrapper.read()
|
| + self.assertEqual(len(data), tmp_file_len - initial_read)
|
| + with open(tmp_file, 'rb') as stream:
|
| + actual = CalculateMd5FromContents(stream)
|
| + self.assertEqual(actual, digesters['md5'].hexdigest())
|
| +
|
| + def testValidSeekAway(self):
|
| + for initial_read in (0,
|
| + TRANSFER_BUFFER_SIZE - 1,
|
| + TRANSFER_BUFFER_SIZE,
|
| + TRANSFER_BUFFER_SIZE + 1,
|
| + TRANSFER_BUFFER_SIZE * 2 - 1,
|
| + TRANSFER_BUFFER_SIZE * 2,
|
| + TRANSFER_BUFFER_SIZE * 2 + 1):
|
| + self._testSeekAway(initial_read)
|
| +
|
| + def testInvalidSeekAway(self):
|
| + """Tests seeking to EOF and then reading without first doing a SEEK_SET."""
|
| + tmp_file = self._GetTestFile()
|
| + digesters = {'md5': md5()}
|
| + with open(tmp_file, 'rb') as stream:
|
| + wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
|
| + self._dummy_url, self.logger)
|
| + wrapper.read(TRANSFER_BUFFER_SIZE)
|
| + wrapper.seek(0, os.SEEK_END)
|
| + try:
|
| + wrapper.read()
|
| + self.fail('Expected CommandException for invalid seek.')
|
| + except CommandException, e:
|
| + self.assertIn(
|
| + 'Read called on hashing file pointer in an unknown position',
|
| + str(e))
|
|
|
| Property changes on: gslib/tests/test_hashing_helper.py
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|