OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2014 Google Inc. All Rights Reserved. |
| 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at |
| 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. |
| 15 """Unit tests for hashing helper functions and classes.""" |
| 16 |
| 17 from __future__ import absolute_import |
| 18 |
| 19 from hashlib import md5 |
| 20 import os |
| 21 import pkgutil |
| 22 |
| 23 from gslib.exception import CommandException |
| 24 from gslib.hashing_helper import CalculateMd5FromContents |
| 25 from gslib.hashing_helper import HashingFileUploadWrapper |
| 26 import gslib.tests.testcase as testcase |
| 27 from gslib.util import StorageUrlFromString |
| 28 from gslib.util import TRANSFER_BUFFER_SIZE |
| 29 |
| 30 |
| 31 _TEST_FILE = 'test.txt' |
| 32 |
| 33 |
| 34 class TestHashingFileUploadWrapper(testcase.GsUtilUnitTestCase): |
| 35 """Unit tests for the HashingFileUploadWrapper class.""" |
| 36 |
| 37 _temp_test_file = None |
| 38 _dummy_url = StorageUrlFromString('gs://bucket/object') |
| 39 |
| 40 def _GetTestFile(self): |
| 41 contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % _TEST_FILE) |
| 42 if not self._temp_test_file: |
| 43 self._temp_test_file = self.CreateTempFile( |
| 44 file_name=_TEST_FILE, contents=contents) |
| 45 return self._temp_test_file |
| 46 |
| 47 def testReadToEOF(self): |
| 48 digesters = {'md5': md5()} |
| 49 tmp_file = self.CreateTempFile(contents='a' * TRANSFER_BUFFER_SIZE * 4) |
| 50 with open(tmp_file, 'rb') as stream: |
| 51 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, |
| 52 self._dummy_url, self.logger) |
| 53 wrapper.read() |
| 54 with open(tmp_file, 'rb') as stream: |
| 55 actual = CalculateMd5FromContents(stream) |
| 56 self.assertEqual(actual, digesters['md5'].hexdigest()) |
| 57 |
| 58 def _testSeekBack(self, initial_position, seek_back_amount): |
| 59 """Tests reading then seeking backwards. |
| 60 |
| 61 This function simulates an upload that is resumed after a connection break. |
| 62 It reads one transfer buffer at a time until it reaches initial_position, |
| 63 then seeks backwards (as if the server did not receive some of the bytes) |
| 64 and reads to the end of the file, ensuring the hash matches the original |
| 65 file upon completion. |
| 66 |
| 67 Args: |
| 68 initial_position: Initial number of bytes to read before seek. |
| 69 seek_back_amount: Number of bytes to seek backward. |
| 70 |
| 71 Raises: |
| 72 AssertionError on wrong amount of data remaining or hash mismatch. |
| 73 """ |
| 74 tmp_file = self._GetTestFile() |
| 75 tmp_file_len = os.path.getsize(tmp_file) |
| 76 |
| 77 self.assertGreaterEqual( |
| 78 initial_position, seek_back_amount, |
| 79 'seek_back_amount must be less than initial position %s ' |
| 80 '(but was actually: %s)' % (initial_position, seek_back_amount)) |
| 81 self.assertLess( |
| 82 initial_position, tmp_file_len, |
| 83 'initial_position must be less than test file size %s ' |
| 84 '(but was actually: %s)' % (tmp_file_len, initial_position)) |
| 85 |
| 86 digesters = {'md5': md5()} |
| 87 with open(tmp_file, 'rb') as stream: |
| 88 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, |
| 89 self._dummy_url, self.logger) |
| 90 position = 0 |
| 91 while position < initial_position - TRANSFER_BUFFER_SIZE: |
| 92 data = wrapper.read(TRANSFER_BUFFER_SIZE) |
| 93 position += len(data) |
| 94 wrapper.read(initial_position - position) |
| 95 wrapper.seek(initial_position - seek_back_amount) |
| 96 self.assertEqual(wrapper.tell(), |
| 97 initial_position - seek_back_amount) |
| 98 data = wrapper.read() |
| 99 self.assertEqual( |
| 100 len(data), tmp_file_len - (initial_position - seek_back_amount)) |
| 101 with open(tmp_file, 'rb') as stream: |
| 102 actual = CalculateMd5FromContents(stream) |
| 103 self.assertEqual(actual, digesters['md5'].hexdigest()) |
| 104 |
| 105 def testSeekToBeginning(self): |
| 106 for num_bytes in (TRANSFER_BUFFER_SIZE - 1, |
| 107 TRANSFER_BUFFER_SIZE, |
| 108 TRANSFER_BUFFER_SIZE + 1, |
| 109 TRANSFER_BUFFER_SIZE * 2 - 1, |
| 110 TRANSFER_BUFFER_SIZE * 2, |
| 111 TRANSFER_BUFFER_SIZE * 2 + 1, |
| 112 TRANSFER_BUFFER_SIZE * 3 - 1, |
| 113 TRANSFER_BUFFER_SIZE * 3, |
| 114 TRANSFER_BUFFER_SIZE * 3 + 1): |
| 115 self._testSeekBack(num_bytes, num_bytes) |
| 116 |
| 117 def testSeekBackAroundOneBuffer(self): |
| 118 for initial_position in (TRANSFER_BUFFER_SIZE + 1, |
| 119 TRANSFER_BUFFER_SIZE * 2 - 1, |
| 120 TRANSFER_BUFFER_SIZE * 2, |
| 121 TRANSFER_BUFFER_SIZE * 2 + 1, |
| 122 TRANSFER_BUFFER_SIZE * 3 - 1, |
| 123 TRANSFER_BUFFER_SIZE * 3, |
| 124 TRANSFER_BUFFER_SIZE * 3 + 1): |
| 125 for seek_back_amount in (TRANSFER_BUFFER_SIZE - 1, |
| 126 TRANSFER_BUFFER_SIZE, |
| 127 TRANSFER_BUFFER_SIZE + 1): |
| 128 self._testSeekBack(initial_position, seek_back_amount) |
| 129 |
| 130 def testSeekBackMoreThanOneBuffer(self): |
| 131 for initial_position in (TRANSFER_BUFFER_SIZE * 2 + 1, |
| 132 TRANSFER_BUFFER_SIZE * 3 - 1, |
| 133 TRANSFER_BUFFER_SIZE * 3, |
| 134 TRANSFER_BUFFER_SIZE * 3 + 1): |
| 135 for seek_back_amount in (TRANSFER_BUFFER_SIZE * 2 - 1, |
| 136 TRANSFER_BUFFER_SIZE * 2, |
| 137 TRANSFER_BUFFER_SIZE * 2 + 1): |
| 138 self._testSeekBack(initial_position, seek_back_amount) |
| 139 |
| 140 def _testSeekForward(self, initial_seek): |
| 141 """Tests seeking to an initial position and then reading. |
| 142 |
| 143 This function simulates an upload that is resumed after a process break. |
| 144 It seeks from zero to the initial position (as if the server already had |
| 145 those bytes). Then it reads to the end of the file, ensuring the hash |
| 146 matches the original file upon completion. |
| 147 |
| 148 Args: |
| 149 initial_seek: Number of bytes to initially seek. |
| 150 |
| 151 Raises: |
| 152 AssertionError on wrong amount of data remaining or hash mismatch. |
| 153 """ |
| 154 tmp_file = self._GetTestFile() |
| 155 tmp_file_len = os.path.getsize(tmp_file) |
| 156 |
| 157 self.assertLess( |
| 158 initial_seek, tmp_file_len, |
| 159 'initial_seek must be less than test file size %s ' |
| 160 '(but was actually: %s)' % (tmp_file_len, initial_seek)) |
| 161 |
| 162 digesters = {'md5': md5()} |
| 163 with open(tmp_file, 'rb') as stream: |
| 164 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, |
| 165 self._dummy_url, self.logger) |
| 166 wrapper.seek(initial_seek) |
| 167 self.assertEqual(wrapper.tell(), initial_seek) |
| 168 data = wrapper.read() |
| 169 self.assertEqual(len(data), tmp_file_len - initial_seek) |
| 170 with open(tmp_file, 'rb') as stream: |
| 171 actual = CalculateMd5FromContents(stream) |
| 172 self.assertEqual(actual, digesters['md5'].hexdigest()) |
| 173 |
| 174 def testSeekForward(self): |
| 175 for initial_seek in (0, |
| 176 TRANSFER_BUFFER_SIZE - 1, |
| 177 TRANSFER_BUFFER_SIZE, |
| 178 TRANSFER_BUFFER_SIZE + 1, |
| 179 TRANSFER_BUFFER_SIZE * 2 - 1, |
| 180 TRANSFER_BUFFER_SIZE * 2, |
| 181 TRANSFER_BUFFER_SIZE * 2 + 1): |
| 182 self._testSeekForward(initial_seek) |
| 183 |
| 184 def _testSeekAway(self, initial_read): |
| 185 """Tests reading to an initial position and then seeking to EOF and back. |
| 186 |
| 187 This function simulates an size check on the input file by seeking to the |
| 188 end of the file and then back to the current position. Then it reads to |
| 189 the end of the file, ensuring the hash matches the original file upon |
| 190 completion. |
| 191 |
| 192 Args: |
| 193 initial_read: Number of bytes to initially read. |
| 194 |
| 195 Raises: |
| 196 AssertionError on wrong amount of data remaining or hash mismatch. |
| 197 """ |
| 198 tmp_file = self._GetTestFile() |
| 199 tmp_file_len = os.path.getsize(tmp_file) |
| 200 |
| 201 self.assertLess( |
| 202 initial_read, tmp_file_len, |
| 203 'initial_read must be less than test file size %s ' |
| 204 '(but was actually: %s)' % (tmp_file_len, initial_read)) |
| 205 |
| 206 digesters = {'md5': md5()} |
| 207 with open(tmp_file, 'rb') as stream: |
| 208 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, |
| 209 self._dummy_url, self.logger) |
| 210 wrapper.read(initial_read) |
| 211 self.assertEqual(wrapper.tell(), initial_read) |
| 212 wrapper.seek(0, os.SEEK_END) |
| 213 self.assertEqual(wrapper.tell(), tmp_file_len) |
| 214 wrapper.seek(initial_read, os.SEEK_SET) |
| 215 data = wrapper.read() |
| 216 self.assertEqual(len(data), tmp_file_len - initial_read) |
| 217 with open(tmp_file, 'rb') as stream: |
| 218 actual = CalculateMd5FromContents(stream) |
| 219 self.assertEqual(actual, digesters['md5'].hexdigest()) |
| 220 |
| 221 def testValidSeekAway(self): |
| 222 for initial_read in (0, |
| 223 TRANSFER_BUFFER_SIZE - 1, |
| 224 TRANSFER_BUFFER_SIZE, |
| 225 TRANSFER_BUFFER_SIZE + 1, |
| 226 TRANSFER_BUFFER_SIZE * 2 - 1, |
| 227 TRANSFER_BUFFER_SIZE * 2, |
| 228 TRANSFER_BUFFER_SIZE * 2 + 1): |
| 229 self._testSeekAway(initial_read) |
| 230 |
| 231 def testInvalidSeekAway(self): |
| 232 """Tests seeking to EOF and then reading without first doing a SEEK_SET.""" |
| 233 tmp_file = self._GetTestFile() |
| 234 digesters = {'md5': md5()} |
| 235 with open(tmp_file, 'rb') as stream: |
| 236 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, |
| 237 self._dummy_url, self.logger) |
| 238 wrapper.read(TRANSFER_BUFFER_SIZE) |
| 239 wrapper.seek(0, os.SEEK_END) |
| 240 try: |
| 241 wrapper.read() |
| 242 self.fail('Expected CommandException for invalid seek.') |
| 243 except CommandException, e: |
| 244 self.assertIn( |
| 245 'Read called on hashing file pointer in an unknown position', |
| 246 str(e)) |
OLD | NEW |