OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2014 Google Inc. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 """Unit tests for hashing helper functions and classes.""" | |
16 | |
17 from __future__ import absolute_import | |
18 | |
19 from hashlib import md5 | |
20 import os | |
21 import pkgutil | |
22 | |
23 from gslib.exception import CommandException | |
24 from gslib.hashing_helper import CalculateMd5FromContents | |
25 from gslib.hashing_helper import HashingFileUploadWrapper | |
26 import gslib.tests.testcase as testcase | |
27 from gslib.util import StorageUrlFromString | |
28 from gslib.util import TRANSFER_BUFFER_SIZE | |
29 | |
30 | |
31 _TEST_FILE = 'test.txt' | |
32 | |
33 | |
34 class TestHashingFileUploadWrapper(testcase.GsUtilUnitTestCase): | |
35 """Unit tests for the HashingFileUploadWrapper class.""" | |
36 | |
37 _temp_test_file = None | |
38 _dummy_url = StorageUrlFromString('gs://bucket/object') | |
39 | |
40 def _GetTestFile(self): | |
41 contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % _TEST_FILE) | |
42 if not self._temp_test_file: | |
43 self._temp_test_file = self.CreateTempFile( | |
44 file_name=_TEST_FILE, contents=contents) | |
45 return self._temp_test_file | |
46 | |
47 def testReadToEOF(self): | |
48 digesters = {'md5': md5()} | |
49 tmp_file = self.CreateTempFile(contents='a' * TRANSFER_BUFFER_SIZE * 4) | |
50 with open(tmp_file, 'rb') as stream: | |
51 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, | |
52 self._dummy_url, self.logger) | |
53 wrapper.read() | |
54 with open(tmp_file, 'rb') as stream: | |
55 actual = CalculateMd5FromContents(stream) | |
56 self.assertEqual(actual, digesters['md5'].hexdigest()) | |
57 | |
58 def _testSeekBack(self, initial_position, seek_back_amount): | |
59 """Tests reading then seeking backwards. | |
60 | |
61 This function simulates an upload that is resumed after a connection break. | |
62 It reads one transfer buffer at a time until it reaches initial_position, | |
63 then seeks backwards (as if the server did not receive some of the bytes) | |
64 and reads to the end of the file, ensuring the hash matches the original | |
65 file upon completion. | |
66 | |
67 Args: | |
68 initial_position: Initial number of bytes to read before seek. | |
69 seek_back_amount: Number of bytes to seek backward. | |
70 | |
71 Raises: | |
72 AssertionError on wrong amount of data remaining or hash mismatch. | |
73 """ | |
74 tmp_file = self._GetTestFile() | |
75 tmp_file_len = os.path.getsize(tmp_file) | |
76 | |
77 self.assertGreaterEqual( | |
78 initial_position, seek_back_amount, | |
79 'seek_back_amount must be less than initial position %s ' | |
80 '(but was actually: %s)' % (initial_position, seek_back_amount)) | |
81 self.assertLess( | |
82 initial_position, tmp_file_len, | |
83 'initial_position must be less than test file size %s ' | |
84 '(but was actually: %s)' % (tmp_file_len, initial_position)) | |
85 | |
86 digesters = {'md5': md5()} | |
87 with open(tmp_file, 'rb') as stream: | |
88 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, | |
89 self._dummy_url, self.logger) | |
90 position = 0 | |
91 while position < initial_position - TRANSFER_BUFFER_SIZE: | |
92 data = wrapper.read(TRANSFER_BUFFER_SIZE) | |
93 position += len(data) | |
94 wrapper.read(initial_position - position) | |
95 wrapper.seek(initial_position - seek_back_amount) | |
96 self.assertEqual(wrapper.tell(), | |
97 initial_position - seek_back_amount) | |
98 data = wrapper.read() | |
99 self.assertEqual( | |
100 len(data), tmp_file_len - (initial_position - seek_back_amount)) | |
101 with open(tmp_file, 'rb') as stream: | |
102 actual = CalculateMd5FromContents(stream) | |
103 self.assertEqual(actual, digesters['md5'].hexdigest()) | |
104 | |
105 def testSeekToBeginning(self): | |
106 for num_bytes in (TRANSFER_BUFFER_SIZE - 1, | |
107 TRANSFER_BUFFER_SIZE, | |
108 TRANSFER_BUFFER_SIZE + 1, | |
109 TRANSFER_BUFFER_SIZE * 2 - 1, | |
110 TRANSFER_BUFFER_SIZE * 2, | |
111 TRANSFER_BUFFER_SIZE * 2 + 1, | |
112 TRANSFER_BUFFER_SIZE * 3 - 1, | |
113 TRANSFER_BUFFER_SIZE * 3, | |
114 TRANSFER_BUFFER_SIZE * 3 + 1): | |
115 self._testSeekBack(num_bytes, num_bytes) | |
116 | |
117 def testSeekBackAroundOneBuffer(self): | |
118 for initial_position in (TRANSFER_BUFFER_SIZE + 1, | |
119 TRANSFER_BUFFER_SIZE * 2 - 1, | |
120 TRANSFER_BUFFER_SIZE * 2, | |
121 TRANSFER_BUFFER_SIZE * 2 + 1, | |
122 TRANSFER_BUFFER_SIZE * 3 - 1, | |
123 TRANSFER_BUFFER_SIZE * 3, | |
124 TRANSFER_BUFFER_SIZE * 3 + 1): | |
125 for seek_back_amount in (TRANSFER_BUFFER_SIZE - 1, | |
126 TRANSFER_BUFFER_SIZE, | |
127 TRANSFER_BUFFER_SIZE + 1): | |
128 self._testSeekBack(initial_position, seek_back_amount) | |
129 | |
130 def testSeekBackMoreThanOneBuffer(self): | |
131 for initial_position in (TRANSFER_BUFFER_SIZE * 2 + 1, | |
132 TRANSFER_BUFFER_SIZE * 3 - 1, | |
133 TRANSFER_BUFFER_SIZE * 3, | |
134 TRANSFER_BUFFER_SIZE * 3 + 1): | |
135 for seek_back_amount in (TRANSFER_BUFFER_SIZE * 2 - 1, | |
136 TRANSFER_BUFFER_SIZE * 2, | |
137 TRANSFER_BUFFER_SIZE * 2 + 1): | |
138 self._testSeekBack(initial_position, seek_back_amount) | |
139 | |
140 def _testSeekForward(self, initial_seek): | |
141 """Tests seeking to an initial position and then reading. | |
142 | |
143 This function simulates an upload that is resumed after a process break. | |
144 It seeks from zero to the initial position (as if the server already had | |
145 those bytes). Then it reads to the end of the file, ensuring the hash | |
146 matches the original file upon completion. | |
147 | |
148 Args: | |
149 initial_seek: Number of bytes to initially seek. | |
150 | |
151 Raises: | |
152 AssertionError on wrong amount of data remaining or hash mismatch. | |
153 """ | |
154 tmp_file = self._GetTestFile() | |
155 tmp_file_len = os.path.getsize(tmp_file) | |
156 | |
157 self.assertLess( | |
158 initial_seek, tmp_file_len, | |
159 'initial_seek must be less than test file size %s ' | |
160 '(but was actually: %s)' % (tmp_file_len, initial_seek)) | |
161 | |
162 digesters = {'md5': md5()} | |
163 with open(tmp_file, 'rb') as stream: | |
164 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, | |
165 self._dummy_url, self.logger) | |
166 wrapper.seek(initial_seek) | |
167 self.assertEqual(wrapper.tell(), initial_seek) | |
168 data = wrapper.read() | |
169 self.assertEqual(len(data), tmp_file_len - initial_seek) | |
170 with open(tmp_file, 'rb') as stream: | |
171 actual = CalculateMd5FromContents(stream) | |
172 self.assertEqual(actual, digesters['md5'].hexdigest()) | |
173 | |
174 def testSeekForward(self): | |
175 for initial_seek in (0, | |
176 TRANSFER_BUFFER_SIZE - 1, | |
177 TRANSFER_BUFFER_SIZE, | |
178 TRANSFER_BUFFER_SIZE + 1, | |
179 TRANSFER_BUFFER_SIZE * 2 - 1, | |
180 TRANSFER_BUFFER_SIZE * 2, | |
181 TRANSFER_BUFFER_SIZE * 2 + 1): | |
182 self._testSeekForward(initial_seek) | |
183 | |
184 def _testSeekAway(self, initial_read): | |
185 """Tests reading to an initial position and then seeking to EOF and back. | |
186 | |
187 This function simulates an size check on the input file by seeking to the | |
188 end of the file and then back to the current position. Then it reads to | |
189 the end of the file, ensuring the hash matches the original file upon | |
190 completion. | |
191 | |
192 Args: | |
193 initial_read: Number of bytes to initially read. | |
194 | |
195 Raises: | |
196 AssertionError on wrong amount of data remaining or hash mismatch. | |
197 """ | |
198 tmp_file = self._GetTestFile() | |
199 tmp_file_len = os.path.getsize(tmp_file) | |
200 | |
201 self.assertLess( | |
202 initial_read, tmp_file_len, | |
203 'initial_read must be less than test file size %s ' | |
204 '(but was actually: %s)' % (tmp_file_len, initial_read)) | |
205 | |
206 digesters = {'md5': md5()} | |
207 with open(tmp_file, 'rb') as stream: | |
208 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, | |
209 self._dummy_url, self.logger) | |
210 wrapper.read(initial_read) | |
211 self.assertEqual(wrapper.tell(), initial_read) | |
212 wrapper.seek(0, os.SEEK_END) | |
213 self.assertEqual(wrapper.tell(), tmp_file_len) | |
214 wrapper.seek(initial_read, os.SEEK_SET) | |
215 data = wrapper.read() | |
216 self.assertEqual(len(data), tmp_file_len - initial_read) | |
217 with open(tmp_file, 'rb') as stream: | |
218 actual = CalculateMd5FromContents(stream) | |
219 self.assertEqual(actual, digesters['md5'].hexdigest()) | |
220 | |
221 def testValidSeekAway(self): | |
222 for initial_read in (0, | |
223 TRANSFER_BUFFER_SIZE - 1, | |
224 TRANSFER_BUFFER_SIZE, | |
225 TRANSFER_BUFFER_SIZE + 1, | |
226 TRANSFER_BUFFER_SIZE * 2 - 1, | |
227 TRANSFER_BUFFER_SIZE * 2, | |
228 TRANSFER_BUFFER_SIZE * 2 + 1): | |
229 self._testSeekAway(initial_read) | |
230 | |
231 def testInvalidSeekAway(self): | |
232 """Tests seeking to EOF and then reading without first doing a SEEK_SET.""" | |
233 tmp_file = self._GetTestFile() | |
234 digesters = {'md5': md5()} | |
235 with open(tmp_file, 'rb') as stream: | |
236 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5}, | |
237 self._dummy_url, self.logger) | |
238 wrapper.read(TRANSFER_BUFFER_SIZE) | |
239 wrapper.seek(0, os.SEEK_END) | |
240 try: | |
241 wrapper.read() | |
242 self.fail('Expected CommandException for invalid seek.') | |
243 except CommandException, e: | |
244 self.assertIn( | |
245 'Read called on hashing file pointer in an unknown position', | |
246 str(e)) | |
OLD | NEW |