OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2014 Google Inc. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 """Integration tests for rsync command.""" | |
16 | |
17 import os | |
18 | |
19 import crcmod | |
20 | |
21 import gslib.tests.testcase as testcase | |
22 from gslib.tests.testcase.integration_testcase import SkipForS3 | |
23 from gslib.tests.util import ObjectToURI as suri | |
24 from gslib.tests.util import PerformsFileToObjectUpload | |
25 from gslib.tests.util import SetBotoConfigForTest | |
26 from gslib.tests.util import unittest | |
27 from gslib.util import IS_WINDOWS | |
28 from gslib.util import Retry | |
29 from gslib.util import UsingCrcmodExtension | |
30 | |
31 NO_CHANGES = 'Building synchronization state...\nStarting synchronization\n' | |
32 | |
33 | |
34 def _TailSet(start_point, listing): | |
35 """Returns set of object name tails. | |
36 | |
37 Tails can be compared between source and dest, past the point at which rsync | |
38 was done. For example if test ran rsync gs://bucket1/dir gs://bucket2/dir2, | |
39 the tails for listings from bucket1 would start after "dir", while the tails | |
40 for listings from bucket2 would start after "dir2". | |
41 | |
42 Args: | |
43 start_point: The target of the rsync command, e.g., for the above command it | |
44 would be gs://bucket1/dir for the bucket1 listing results and | |
45 gs://bucket2/dir2 for the bucket2 listing results. | |
46 listing: The listing over which to compute tail. | |
47 | |
48 Returns: | |
49 Object name tails. | |
50 """ | |
51 return set(l[len(start_point):] for l in listing.strip().split('\n')) | |
52 | |
53 # TODO: Add inspection to the retry wrappers in this test suite where the state | |
54 # at the end of a retry block is depended upon by subsequent tests (since | |
55 # listing content can vary depending on which backend server is reached until | |
56 # eventual consistency is reached). | |
57 # TODO: Remove retry wrappers and AssertNObjectsInBucket calls if GCS ever | |
58 # supports strong listing consistency. | |
59 class TestRsync(testcase.GsUtilIntegrationTestCase): | |
60 """Integration tests for rsync command.""" | |
61 | |
62 @staticmethod | |
63 def _FlatListDir(directory): | |
64 """Perform a flat listing over directory. | |
65 | |
66 Args: | |
67 directory: The directory to list | |
68 | |
69 Returns: | |
70 Listings with path separators canonicalized to '/', to make assertions | |
71 easier for Linux vs Windows. | |
72 """ | |
73 result = [] | |
74 for dirpath, _, filenames in os.walk(directory): | |
75 for f in filenames: | |
76 result.append(os.path.join(dirpath, f)) | |
77 return '\n'.join(result).replace('\\', '/') | |
78 | |
79 def _FlatListBucket(self, bucket_url_string): | |
80 """Perform a flat listing over bucket_url_string.""" | |
81 return self.RunGsUtil(['ls', suri(bucket_url_string, '**')], | |
82 return_stdout=True) | |
83 | |
84 def test_invalid_args(self): | |
85 """Tests various invalid argument cases.""" | |
86 bucket_uri = self.CreateBucket() | |
87 obj1 = self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', | |
88 contents='obj1') | |
89 tmpdir = self.CreateTempDir() | |
90 # rsync object to bucket. | |
91 self.RunGsUtil(['rsync', suri(obj1), suri(bucket_uri)], expected_status=1) | |
92 # rsync bucket to object. | |
93 self.RunGsUtil(['rsync', suri(bucket_uri), suri(obj1)], expected_status=1) | |
94 # rsync bucket to non-existent bucket. | |
95 self.RunGsUtil(['rsync', suri(bucket_uri), self.nonexistent_bucket_name], | |
96 expected_status=1) | |
97 # rsync object to dir. | |
98 self.RunGsUtil(['rsync', suri(obj1), tmpdir], expected_status=1) | |
99 # rsync dir to object. | |
100 self.RunGsUtil(['rsync', tmpdir, suri(obj1)], expected_status=1) | |
101 # rsync dir to non-existent bucket. | |
102 self.RunGsUtil(['rsync', tmpdir, suri(obj1), self.nonexistent_bucket_name], | |
103 expected_status=1) | |
104 | |
105 # Note: The tests below exercise the cases | |
106 # {src_dir, src_bucket} X {dst_dir, dst_bucket}. We use gsutil rsync -d for | |
107 # all the cases but then have just one test without -d (test_bucket_to_bucket) | |
108 # as representative of handling without the -d option. This provides | |
109 # reasonable test coverage because the -d handling it src/dest URI-type | |
110 # independent, and keeps the test case combinations more manageable. | |
111 | |
112 def test_bucket_to_bucket(self): | |
113 """Tests that flat and recursive rsync between 2 buckets works correctly.""" | |
114 # Create 2 buckets with 1 overlapping object, 1 extra object at root level | |
115 # in each, and 1 extra object 1 level down in each. Make the overlapping | |
116 # objects named the same but with different content, to test that we detect | |
117 # and properly copy in that case. | |
118 bucket1_uri = self.CreateBucket() | |
119 bucket2_uri = self.CreateBucket() | |
120 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', | |
121 contents='obj1') | |
122 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', | |
123 contents='obj2') | |
124 self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3', | |
125 contents='subdir/obj3') | |
126 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', | |
127 contents='OBJ2') | |
128 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4', | |
129 contents='obj4') | |
130 self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5', | |
131 contents='subdir/obj5') | |
132 | |
133 # Use @Retry as hedge against bucket listing eventual consistency. | |
134 @Retry(AssertionError, tries=3, timeout_secs=1) | |
135 def _Check1(): | |
136 """Tests rsync works as expected.""" | |
137 self.RunGsUtil(['rsync', suri(bucket1_uri), suri(bucket2_uri)]) | |
138 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) | |
139 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) | |
140 # First bucket should have un-altered content. | |
141 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) | |
142 # Second bucket should have new objects added from source bucket (without | |
143 # removing extraneeous object found in dest bucket), and without the | |
144 # subdir objects synchronized. | |
145 self.assertEquals(listing2, | |
146 set(['/obj1', '/obj2', '/obj4', '/subdir/obj5'])) | |
147 # Assert that the src/dest objects that had same length but different | |
148 # content were correctly synchronized (bucket to bucket sync uses | |
149 # checksums). | |
150 self.assertEquals('obj2', self.RunGsUtil( | |
151 ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True)) | |
152 self.assertEquals('obj2', self.RunGsUtil( | |
153 ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True)) | |
154 _Check1() | |
155 | |
156 # Use @Retry as hedge against bucket listing eventual consistency. | |
157 @Retry(AssertionError, tries=3, timeout_secs=1) | |
158 def _Check2(): | |
159 # Check that re-running the same rsync command causes no more changes. | |
160 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
161 ['rsync', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) | |
162 _Check2() | |
163 | |
164 # Now add and remove some objects in each bucket and test rsync -r. | |
165 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6', | |
166 contents='obj6') | |
167 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7', | |
168 contents='obj7') | |
169 self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')]) | |
170 self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')]) | |
171 | |
172 # Use @Retry as hedge against bucket listing eventual consistency. | |
173 @Retry(AssertionError, tries=3, timeout_secs=1) | |
174 def _Check3(): | |
175 self.RunGsUtil(['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)]) | |
176 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) | |
177 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) | |
178 # First bucket should have un-altered content. | |
179 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) | |
180 # Second bucket should have objects tha were newly added to first bucket | |
181 # (wihout removing extraneous dest bucket objects), and without the | |
182 # subdir objects synchronized. | |
183 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj4', '/obj6', | |
184 '/obj7', '/subdir/obj3', | |
185 '/subdir/obj5'])) | |
186 _Check3() | |
187 | |
188 # Use @Retry as hedge against bucket listing eventual consistency. | |
189 @Retry(AssertionError, tries=3, timeout_secs=1) | |
190 def _Check4(): | |
191 # Check that re-running the same rsync command causes no more changes. | |
192 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
193 ['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)], | |
194 return_stderr=True)) | |
195 _Check4() | |
196 | |
197 def test_bucket_to_bucket_minus_d(self): | |
198 """Tests that flat and recursive rsync between 2 buckets works correctly.""" | |
199 # Create 2 buckets with 1 overlapping object, 1 extra object at root level | |
200 # in each, and 1 extra object 1 level down in each. Make the overlapping | |
201 # objects named the same but with different content, to test that we detect | |
202 # and properly copy in that case. | |
203 bucket1_uri = self.CreateBucket() | |
204 bucket2_uri = self.CreateBucket() | |
205 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', | |
206 contents='obj1') | |
207 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', | |
208 contents='obj2') | |
209 self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3', | |
210 contents='subdir/obj3') | |
211 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', | |
212 contents='OBJ2') | |
213 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4', | |
214 contents='obj4') | |
215 self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5', | |
216 contents='subdir/obj5') | |
217 | |
218 # Use @Retry as hedge against bucket listing eventual consistency. | |
219 @Retry(AssertionError, tries=3, timeout_secs=1) | |
220 def _Check1(): | |
221 """Tests rsync works as expected.""" | |
222 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) | |
223 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) | |
224 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) | |
225 # First bucket should have un-altered content. | |
226 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) | |
227 # Second bucket should have content like first bucket but without the | |
228 # subdir objects synchronized. | |
229 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) | |
230 # Assert that the src/dest objects that had same length but different | |
231 # content were correctly synchronized (bucket to bucket sync uses | |
232 # checksums). | |
233 self.assertEquals('obj2', self.RunGsUtil( | |
234 ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True)) | |
235 self.assertEquals('obj2', self.RunGsUtil( | |
236 ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True)) | |
237 _Check1() | |
238 | |
239 # Use @Retry as hedge against bucket listing eventual consistency. | |
240 @Retry(AssertionError, tries=3, timeout_secs=1) | |
241 def _Check2(): | |
242 # Check that re-running the same rsync command causes no more changes. | |
243 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
244 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], | |
245 return_stderr=True)) | |
246 _Check2() | |
247 | |
248 # Now add and remove some objects in each bucket and test rsync -r. | |
249 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6', | |
250 contents='obj6') | |
251 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7', | |
252 contents='obj7') | |
253 self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')]) | |
254 self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')]) | |
255 | |
256 # Use @Retry as hedge against bucket listing eventual consistency. | |
257 @Retry(AssertionError, tries=3, timeout_secs=1) | |
258 def _Check3(): | |
259 self.RunGsUtil(['rsync', '-d', '-r', | |
260 suri(bucket1_uri), suri(bucket2_uri)]) | |
261 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) | |
262 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) | |
263 # First bucket should have un-altered content. | |
264 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) | |
265 # Second bucket should have content like first bucket but without the | |
266 # subdir objects synchronized. | |
267 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3'])) | |
268 _Check3() | |
269 | |
270 # Use @Retry as hedge against bucket listing eventual consistency. | |
271 @Retry(AssertionError, tries=3, timeout_secs=1) | |
272 def _Check4(): | |
273 # Check that re-running the same rsync command causes no more changes. | |
274 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
275 ['rsync', '-d', '-r', suri(bucket1_uri), suri(bucket2_uri)], | |
276 return_stderr=True)) | |
277 _Check4() | |
278 | |
279 # Test sequential upload as well as parallel composite upload case. | |
280 @PerformsFileToObjectUpload | |
281 @unittest.skipUnless(UsingCrcmodExtension(crcmod), | |
282 'Test requires fast crcmod.') | |
283 def test_dir_to_bucket_minus_d(self): | |
284 """Tests that flat and recursive rsync dir to bucket works correctly.""" | |
285 # Create dir and bucket with 1 overlapping object, 1 extra object at root | |
286 # level in each, and 1 extra object 1 level down in each. Make the | |
287 # overlapping objects named the same but with different content, to test | |
288 # that we detect and properly copy in that case. | |
289 tmpdir = self.CreateTempDir() | |
290 subdir = os.path.join(tmpdir, 'subdir') | |
291 os.mkdir(subdir) | |
292 bucket_uri = self.CreateBucket() | |
293 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') | |
294 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') | |
295 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3') | |
296 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', | |
297 contents='OBJ2') | |
298 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4', | |
299 contents='obj4') | |
300 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5', | |
301 contents='subdir/obj5') | |
302 | |
303 # Need to make sure the bucket listing is caught-up, otherwise the | |
304 # first rsync may not see obj2 and overwrite it. | |
305 self.AssertNObjectsInBucket(bucket_uri, 3) | |
306 | |
307 # Use @Retry as hedge against bucket listing eventual consistency. | |
308 @Retry(AssertionError, tries=3, timeout_secs=1) | |
309 def _Check1(): | |
310 """Tests rsync works as expected.""" | |
311 self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)]) | |
312 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
313 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
314 # Dir should have un-altered content. | |
315 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) | |
316 # Bucket should have content like dir but without the subdir objects | |
317 # synchronized. | |
318 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) | |
319 # Assert that the src/dest objects that had same length but different | |
320 # content were not synchronized (dir to bucket sync doesn't use checksums | |
321 # unless you specify -c). | |
322 with open(os.path.join(tmpdir, 'obj2')) as f: | |
323 self.assertEquals('obj2', '\n'.join(f.readlines())) | |
324 self.assertEquals('OBJ2', self.RunGsUtil( | |
325 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) | |
326 _Check1() | |
327 | |
328 # Use @Retry as hedge against bucket listing eventual consistency. | |
329 @Retry(AssertionError, tries=3, timeout_secs=1) | |
330 def _Check2(): | |
331 # Check that re-running the same rsync command causes no more changes. | |
332 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
333 ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True)) | |
334 _Check2() | |
335 | |
336 # Now rerun the sync with the -c option. | |
337 # Use @Retry as hedge against bucket listing eventual consistency. | |
338 @Retry(AssertionError, tries=3, timeout_secs=1) | |
339 def _Check3(): | |
340 """Tests rsync -c works as expected.""" | |
341 self.RunGsUtil(['rsync', '-d', '-c', tmpdir, suri(bucket_uri)]) | |
342 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
343 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
344 # Dir should have un-altered content. | |
345 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) | |
346 # Bucket should have content like dir but without the subdir objects | |
347 # synchronized. | |
348 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) | |
349 # Assert that the src/dest objects that had same length but different | |
350 # content were synchronized (dir to bucket sync with -c uses checksums). | |
351 with open(os.path.join(tmpdir, 'obj2')) as f: | |
352 self.assertEquals('obj2', '\n'.join(f.readlines())) | |
353 self.assertEquals('obj2', self.RunGsUtil( | |
354 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) | |
355 _Check3() | |
356 | |
357 # Use @Retry as hedge against bucket listing eventual consistency. | |
358 @Retry(AssertionError, tries=3, timeout_secs=1) | |
359 def _Check4(): | |
360 # Check that re-running the same rsync command causes no more changes. | |
361 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
362 ['rsync', '-d', '-c', tmpdir, suri(bucket_uri)], return_stderr=True)) | |
363 _Check4() | |
364 | |
365 # Now add and remove some objects in dir and bucket and test rsync -r. | |
366 self.CreateTempFile(tmpdir=tmpdir, file_name='obj6', contents='obj6') | |
367 self.CreateObject(bucket_uri=bucket_uri, object_name='obj7', | |
368 contents='obj7') | |
369 os.unlink(os.path.join(tmpdir, 'obj1')) | |
370 self.RunGsUtil(['rm', suri(bucket_uri, 'obj2')]) | |
371 | |
372 # Use @Retry as hedge against bucket listing eventual consistency. | |
373 @Retry(AssertionError, tries=3, timeout_secs=1) | |
374 def _Check5(): | |
375 self.RunGsUtil(['rsync', '-d', '-r', tmpdir, suri(bucket_uri)]) | |
376 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
377 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
378 # Dir should have un-altered content. | |
379 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) | |
380 # Bucket should have content like dir but without the subdir objects | |
381 # synchronized. | |
382 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3'])) | |
383 _Check5() | |
384 | |
385 # Use @Retry as hedge against bucket listing eventual consistency. | |
386 @Retry(AssertionError, tries=3, timeout_secs=1) | |
387 def _Check6(): | |
388 # Check that re-running the same rsync command causes no more changes. | |
389 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
390 ['rsync', '-d', '-r', tmpdir, suri(bucket_uri)], return_stderr=True)) | |
391 _Check6() | |
392 | |
393 @unittest.skipUnless(UsingCrcmodExtension(crcmod), | |
394 'Test requires fast crcmod.') | |
395 def test_dir_to_dir_minus_d(self): | |
396 """Tests that flat and recursive rsync dir to dir works correctly.""" | |
397 # Create 2 dirs with 1 overlapping file, 1 extra file at root | |
398 # level in each, and 1 extra file 1 level down in each. Make the | |
399 # overlapping files named the same but with different content, to test | |
400 # that we detect and properly copy in that case. | |
401 tmpdir1 = self.CreateTempDir() | |
402 tmpdir2 = self.CreateTempDir() | |
403 subdir1 = os.path.join(tmpdir1, 'subdir1') | |
404 subdir2 = os.path.join(tmpdir2, 'subdir2') | |
405 os.mkdir(subdir1) | |
406 os.mkdir(subdir2) | |
407 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj1', contents='obj1') | |
408 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj2', contents='obj2') | |
409 self.CreateTempFile( | |
410 tmpdir=subdir1, file_name='obj3', contents='subdir1/obj3') | |
411 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj2', contents='OBJ2') | |
412 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj4', contents='obj4') | |
413 self.CreateTempFile( | |
414 tmpdir=subdir2, file_name='obj5', contents='subdir2/obj5') | |
415 | |
416 self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2]) | |
417 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) | |
418 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) | |
419 # dir1 should have un-altered content. | |
420 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3'])) | |
421 # dir2 should have content like dir1 but without the subdir1 objects | |
422 # synchronized. | |
423 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5'])) | |
424 # Assert that the src/dest objects that had same length but different | |
425 # checksums were not synchronized (dir to dir sync doesn't use checksums | |
426 # unless you specify -c). | |
427 with open(os.path.join(tmpdir1, 'obj2')) as f: | |
428 self.assertEquals('obj2', '\n'.join(f.readlines())) | |
429 with open(os.path.join(tmpdir2, 'obj2')) as f: | |
430 self.assertEquals('OBJ2', '\n'.join(f.readlines())) | |
431 | |
432 # Use @Retry as hedge against bucket listing eventual consistency. | |
433 @Retry(AssertionError, tries=3, timeout_secs=1) | |
434 def _Check1(): | |
435 # Check that re-running the same rsync command causes no more changes. | |
436 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
437 ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True)) | |
438 _Check1() | |
439 | |
440 # Now rerun the sync with the -c option. | |
441 self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2]) | |
442 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) | |
443 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) | |
444 # dir1 should have un-altered content. | |
445 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3'])) | |
446 # dir2 should have content like dir but without the subdir objects | |
447 # synchronized. | |
448 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5'])) | |
449 # Assert that the src/dest objects that had same length but different | |
450 # content were synchronized (dir to dir sync with -c uses checksums). | |
451 with open(os.path.join(tmpdir1, 'obj2')) as f: | |
452 self.assertEquals('obj2', '\n'.join(f.readlines())) | |
453 with open(os.path.join(tmpdir1, 'obj2')) as f: | |
454 self.assertEquals('obj2', '\n'.join(f.readlines())) | |
455 | |
456 # Use @Retry as hedge against bucket listing eventual consistency. | |
457 @Retry(AssertionError, tries=3, timeout_secs=1) | |
458 def _Check2(): | |
459 # Check that re-running the same rsync command causes no more changes. | |
460 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
461 ['rsync', '-d', '-c', tmpdir1, tmpdir2], return_stderr=True)) | |
462 _Check2() | |
463 | |
464 # Now add and remove some objects in both dirs and test rsync -r. | |
465 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj6', contents='obj6') | |
466 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj7', contents='obj7') | |
467 os.unlink(os.path.join(tmpdir1, 'obj1')) | |
468 os.unlink(os.path.join(tmpdir2, 'obj2')) | |
469 | |
470 self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2]) | |
471 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) | |
472 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) | |
473 # dir1 should have un-altered content. | |
474 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir1/obj3'])) | |
475 # dir2 should have content like dir but without the subdir objects | |
476 # synchronized. | |
477 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir1/obj3'])) | |
478 | |
479 # Use @Retry as hedge against bucket listing eventual consistency. | |
480 @Retry(AssertionError, tries=3, timeout_secs=1) | |
481 def _Check3(): | |
482 # Check that re-running the same rsync command causes no more changes. | |
483 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
484 ['rsync', '-d', '-r', tmpdir1, tmpdir2], return_stderr=True)) | |
485 _Check3() | |
486 | |
487 def test_dir_to_dir_minus_d_more_files_than_bufsize(self): | |
488 """Tests concurrently building listing from multiple tmp file ranges.""" | |
489 # Create 2 dirs, where each dir has 1000 objects and differing names. | |
490 tmpdir1 = self.CreateTempDir() | |
491 tmpdir2 = self.CreateTempDir() | |
492 for i in range(0, 1000): | |
493 self.CreateTempFile(tmpdir=tmpdir1, file_name='d1-%s' %i, contents='x') | |
494 self.CreateTempFile(tmpdir=tmpdir2, file_name='d2-%s' %i, contents='y') | |
495 | |
496 # We open a new temp file each time we reach rsync_buffer_lines of | |
497 # listing output. On Windows, this will result in a 'too many open file | |
498 # handles' error, so choose a larger value so as not to open so many files. | |
499 rsync_buffer_config = [('GSUtil', 'rsync_buffer_lines', | |
500 '50' if IS_WINDOWS else '2')] | |
501 # Run gsutil with config option to make buffer size << # files. | |
502 with SetBotoConfigForTest(rsync_buffer_config): | |
503 self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2]) | |
504 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) | |
505 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) | |
506 self.assertEquals(listing1, listing2) | |
507 | |
508 # Use @Retry as hedge against bucket listing eventual consistency. | |
509 @Retry(AssertionError, tries=3, timeout_secs=1) | |
510 def _Check(): | |
511 # Check that re-running the same rsync command causes no more changes. | |
512 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
513 ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True)) | |
514 _Check() | |
515 | |
516 @unittest.skipUnless(UsingCrcmodExtension(crcmod), | |
517 'Test requires fast crcmod.') | |
518 def test_bucket_to_dir_minus_d(self): | |
519 """Tests that flat and recursive rsync bucket to dir works correctly.""" | |
520 # Create bucket and dir with 1 overlapping object, 1 extra object at root | |
521 # level in each, and 1 extra object 1 level down in each. Make the | |
522 # overlapping objects named the same but with different content, to test | |
523 # that we detect and properly copy in that case. | |
524 bucket_uri = self.CreateBucket() | |
525 tmpdir = self.CreateTempDir() | |
526 subdir = os.path.join(tmpdir, 'subdir') | |
527 os.mkdir(subdir) | |
528 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', | |
529 contents='obj1') | |
530 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', | |
531 contents='obj2') | |
532 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj3', | |
533 contents='subdir/obj3') | |
534 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='OBJ2') | |
535 self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents='obj4') | |
536 self.CreateTempFile(tmpdir=subdir, file_name='obj5', contents='subdir/obj5') | |
537 | |
538 # Use @Retry as hedge against bucket listing eventual consistency. | |
539 @Retry(AssertionError, tries=3, timeout_secs=1) | |
540 def _Check1(): | |
541 """Tests rsync works as expected.""" | |
542 self.RunGsUtil(['rsync', '-d', suri(bucket_uri), tmpdir]) | |
543 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
544 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
545 # Bucket should have un-altered content. | |
546 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) | |
547 # Dir should have content like bucket but without the subdir objects | |
548 # synchronized. | |
549 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) | |
550 # Assert that the src/dest objects that had same length but different | |
551 # content were not synchronized (bucket to dir sync doesn't use checksums | |
552 # unless you specify -c). | |
553 self.assertEquals('obj2', self.RunGsUtil( | |
554 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) | |
555 with open(os.path.join(tmpdir, 'obj2')) as f: | |
556 self.assertEquals('OBJ2', '\n'.join(f.readlines())) | |
557 _Check1() | |
558 | |
559 # Use @Retry as hedge against bucket listing eventual consistency. | |
560 @Retry(AssertionError, tries=3, timeout_secs=1) | |
561 def _Check2(): | |
562 # Check that re-running the same rsync command causes no more changes. | |
563 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
564 ['rsync', '-d', suri(bucket_uri), tmpdir], return_stderr=True)) | |
565 _Check2() | |
566 | |
567 # Now rerun the sync with the -c option. | |
568 # Use @Retry as hedge against bucket listing eventual consistency. | |
569 @Retry(AssertionError, tries=3, timeout_secs=1) | |
570 def _Check3(): | |
571 """Tests rsync -c works as expected.""" | |
572 self.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri), tmpdir]) | |
573 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
574 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
575 # Bucket should have un-altered content. | |
576 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) | |
577 # Dir should have content like bucket but without the subdir objects | |
578 # synchronized. | |
579 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) | |
580 # Assert that the src/dest objects that had same length but different | |
581 # content were synchronized (bucket to dir sync with -c uses checksums). | |
582 self.assertEquals('obj2', self.RunGsUtil( | |
583 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) | |
584 with open(os.path.join(tmpdir, 'obj2')) as f: | |
585 self.assertEquals('obj2', '\n'.join(f.readlines())) | |
586 _Check3() | |
587 | |
588 # Use @Retry as hedge against bucket listing eventual consistency. | |
589 @Retry(AssertionError, tries=3, timeout_secs=1) | |
590 def _Check4(): | |
591 # Check that re-running the same rsync command causes no more changes. | |
592 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
593 ['rsync', '-d', '-c', suri(bucket_uri), tmpdir], return_stderr=True)) | |
594 _Check4() | |
595 | |
596 # Now add and remove some objects in dir and bucket and test rsync -r. | |
597 self.CreateObject(bucket_uri=bucket_uri, object_name='obj6', | |
598 contents='obj6') | |
599 self.CreateTempFile(tmpdir=tmpdir, file_name='obj7', contents='obj7') | |
600 self.RunGsUtil(['rm', suri(bucket_uri, 'obj1')]) | |
601 os.unlink(os.path.join(tmpdir, 'obj2')) | |
602 | |
603 # Use @Retry as hedge against bucket listing eventual consistency. | |
604 @Retry(AssertionError, tries=3, timeout_secs=1) | |
605 def _Check5(): | |
606 self.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri), tmpdir]) | |
607 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
608 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
609 # Bucket should have un-altered content. | |
610 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) | |
611 # Dir should have content like bucket but without the subdir objects | |
612 # synchronized. | |
613 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3'])) | |
614 _Check5() | |
615 | |
616 # Use @Retry as hedge against bucket listing eventual consistency. | |
617 @Retry(AssertionError, tries=3, timeout_secs=1) | |
618 def _Check6(): | |
619 # Check that re-running the same rsync command causes no more changes. | |
620 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
621 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)) | |
622 _Check6() | |
623 | |
624 def test_bucket_to_dir_minus_d_with_fname_case_change(self): | |
625 """Tests that name case changes work correctly. | |
626 | |
627 Example: | |
628 | |
629 Windows filenames are case-preserving in what you wrote, but case- | |
630 insensitive when compared. If you synchronize from FS to cloud and then | |
631 change case-naming in local files, you could end up with this situation: | |
632 | |
633 Cloud copy is called .../TiVo/... | |
634 FS copy is called .../Tivo/... | |
635 | |
636 Then, if you sync from cloud to FS, if rsync doesn't recognize that on | |
637 Windows these names are identical, each rsync run will cause both a copy | |
638 and a delete to be executed. | |
639 """ | |
640 # Create bucket and dir with same objects, but dir copy has different name | |
641 # case. | |
642 bucket_uri = self.CreateBucket() | |
643 tmpdir = self.CreateTempDir() | |
644 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', | |
645 contents='obj1') | |
646 self.CreateTempFile(tmpdir=tmpdir, file_name='Obj1', contents='obj1') | |
647 | |
648 # Use @Retry as hedge against bucket listing eventual consistency. | |
649 @Retry(AssertionError, tries=3, timeout_secs=1) | |
650 def _Check1(): | |
651 """Tests rsync works as expected.""" | |
652 output = self.RunGsUtil( | |
653 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True) | |
654 # Nothing should be copied or removed under Windows. | |
655 if IS_WINDOWS: | |
656 self.assertEquals(NO_CHANGES, output) | |
657 else: | |
658 self.assertNotEquals(NO_CHANGES, output) | |
659 _Check1() | |
660 | |
661 def test_bucket_to_dir_minus_d_with_leftover_dir_placeholder(self): | |
662 """Tests that we correctly handle leftover dir placeholders. | |
663 | |
664 See comments in gslib.commands.rsync._FieldedListingIterator for details. | |
665 """ | |
666 bucket_uri = self.CreateBucket() | |
667 tmpdir = self.CreateTempDir() | |
668 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', | |
669 contents='obj1') | |
670 # Create a placeholder like what can be left over by web GUI tools. | |
671 key_uri = bucket_uri.clone_replace_name('/') | |
672 key_uri.set_contents_from_string('') | |
673 | |
674 # Use @Retry as hedge against bucket listing eventual consistency. | |
675 @Retry(AssertionError, tries=3, timeout_secs=1) | |
676 def _Check1(): | |
677 """Tests rsync works as expected.""" | |
678 output = self.RunGsUtil( | |
679 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True) | |
680 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
681 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
682 # Bucket should have un-altered content. | |
683 self.assertEquals(listing1, set(['/obj1', '//'])) | |
684 # Bucket should not have the placeholder object. | |
685 self.assertEquals(listing2, set(['/obj1'])) | |
686 # Stdout should report what happened. | |
687 self.assertRegexpMatches(output, r'.*Skipping cloud sub-directory.*') | |
688 _Check1() | |
689 | |
690 @unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.') | |
691 def test_rsync_minus_d_minus_e(self): | |
692 """Tests that rsync -e ignores symlinks.""" | |
693 tmpdir = self.CreateTempDir() | |
694 subdir = os.path.join(tmpdir, 'subdir') | |
695 os.mkdir(subdir) | |
696 bucket_uri = self.CreateBucket() | |
697 fpath1 = self.CreateTempFile( | |
698 tmpdir=tmpdir, file_name='obj1', contents='obj1') | |
699 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') | |
700 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3') | |
701 good_symlink_path = os.path.join(tmpdir, 'symlink1') | |
702 os.symlink(fpath1, good_symlink_path) | |
703 # Make a symlink that points to a non-existent path to test that -e also | |
704 # handles that case. | |
705 bad_symlink_path = os.path.join(tmpdir, 'symlink2') | |
706 os.symlink(os.path.join('/', 'non-existent'), bad_symlink_path) | |
707 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', | |
708 contents='OBJ2') | |
709 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4', | |
710 contents='obj4') | |
711 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5', | |
712 contents='subdir/obj5') | |
713 | |
714 # Use @Retry as hedge against bucket listing eventual consistency. | |
715 @Retry(AssertionError, tries=3, timeout_secs=1) | |
716 def _Check1(): | |
717 """Ensure listings match the commented expectations.""" | |
718 self.RunGsUtil(['rsync', '-d', '-e', tmpdir, suri(bucket_uri)]) | |
719 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
720 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
721 # Dir should have un-altered content. | |
722 self.assertEquals( | |
723 listing1, | |
724 set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1', '/symlink2'])) | |
725 # Bucket should have content like dir but without the symlink, and | |
726 # without subdir objects synchronized. | |
727 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) | |
728 _Check1() | |
729 | |
730 # Now remove invalid symlink and run without -e, and see that symlink gets | |
731 # copied (as file to which it points). Use @Retry as hedge against bucket | |
732 # listing eventual consistency. | |
733 os.unlink(bad_symlink_path) | |
734 @Retry(AssertionError, tries=3, timeout_secs=1) | |
735 def _Check2(): | |
736 """Tests rsync works as expected.""" | |
737 self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)]) | |
738 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
739 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
740 # Dir should have un-altered content. | |
741 self.assertEquals( | |
742 listing1, set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1'])) | |
743 # Bucket should have content like dir but without the symlink, and | |
744 # without subdir objects synchronized. | |
745 self.assertEquals( | |
746 listing2, set(['/obj1', '/obj2', '/subdir/obj5', '/symlink1'])) | |
747 self.assertEquals('obj1', self.RunGsUtil( | |
748 ['cat', suri(bucket_uri, 'symlink1')], return_stdout=True)) | |
749 _Check2() | |
750 | |
751 # Use @Retry as hedge against bucket listing eventual consistency. | |
752 @Retry(AssertionError, tries=3, timeout_secs=1) | |
753 def _Check3(): | |
754 # Check that re-running the same rsync command causes no more changes. | |
755 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
756 ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True)) | |
757 _Check3() | |
758 | |
759 @SkipForS3('S3 does not support composite objects') | |
760 def test_bucket_to_bucket_minus_d_with_composites(self): | |
761 """Tests that rsync works with composite objects (which don't have MD5s).""" | |
762 bucket1_uri = self.CreateBucket() | |
763 bucket2_uri = self.CreateBucket() | |
764 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', | |
765 contents='obj1') | |
766 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', | |
767 contents='obj2') | |
768 self.RunGsUtil( | |
769 ['compose', suri(bucket1_uri, 'obj1'), suri(bucket1_uri, 'obj2'), | |
770 suri(bucket1_uri, 'obj3')]) | |
771 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', | |
772 contents='OBJ2') | |
773 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4', | |
774 contents='obj4') | |
775 | |
776 # Use @Retry as hedge against bucket listing eventual consistency. | |
777 @Retry(AssertionError, tries=3, timeout_secs=1) | |
778 def _Check1(): | |
779 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) | |
780 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) | |
781 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) | |
782 # First bucket should have un-altered content. | |
783 self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3'])) | |
784 # Second bucket should have content like first bucket but without the | |
785 # subdir objects synchronized. | |
786 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj3'])) | |
787 _Check1() | |
788 | |
789 # Use @Retry as hedge against bucket listing eventual consistency. | |
790 @Retry(AssertionError, tries=3, timeout_secs=1) | |
791 def _Check2(): | |
792 # Check that re-running the same rsync command causes no more changes. | |
793 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
794 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], | |
795 return_stderr=True)) | |
796 _Check2() | |
797 | |
798 def test_bucket_to_bucket_minus_d_empty_dest(self): | |
799 """Tests working with empty dest bucket (iter runs out before src iter).""" | |
800 bucket1_uri = self.CreateBucket() | |
801 bucket2_uri = self.CreateBucket() | |
802 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', | |
803 contents='obj1') | |
804 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', | |
805 contents='obj2') | |
806 | |
807 # Use @Retry as hedge against bucket listing eventual consistency. | |
808 @Retry(AssertionError, tries=3, timeout_secs=1) | |
809 def _Check1(): | |
810 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) | |
811 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) | |
812 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) | |
813 self.assertEquals(listing1, set(['/obj1', '/obj2'])) | |
814 self.assertEquals(listing2, set(['/obj1', '/obj2'])) | |
815 _Check1() | |
816 | |
817 # Use @Retry as hedge against bucket listing eventual consistency. | |
818 @Retry(AssertionError, tries=3, timeout_secs=1) | |
819 def _Check2(): | |
820 # Check that re-running the same rsync command causes no more changes. | |
821 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
822 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], | |
823 return_stderr=True)) | |
824 _Check2() | |
825 | |
826 def test_bucket_to_bucket_minus_d_empty_src(self): | |
827 """Tests working with empty src bucket (iter runs out before dst iter).""" | |
828 bucket1_uri = self.CreateBucket() | |
829 bucket2_uri = self.CreateBucket() | |
830 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj1', | |
831 contents='obj1') | |
832 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', | |
833 contents='obj2') | |
834 | |
835 # Use @Retry as hedge against bucket listing eventual consistency. | |
836 @Retry(AssertionError, tries=3, timeout_secs=1) | |
837 def _Check1(): | |
838 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) | |
839 stderr = self.RunGsUtil(['ls', suri(bucket1_uri, '**')], | |
840 expected_status=1, return_stderr=True) | |
841 self.assertIn('One or more URLs matched no objects', stderr) | |
842 stderr = self.RunGsUtil(['ls', suri(bucket2_uri, '**')], | |
843 expected_status=1, return_stderr=True) | |
844 self.assertIn('One or more URLs matched no objects', stderr) | |
845 _Check1() | |
846 | |
847 # Use @Retry as hedge against bucket listing eventual consistency. | |
848 @Retry(AssertionError, tries=3, timeout_secs=1) | |
849 def _Check2(): | |
850 # Check that re-running the same rsync command causes no more changes. | |
851 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
852 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], | |
853 return_stderr=True)) | |
854 _Check2() | |
855 | |
856 def test_rsync_minus_d_minus_p(self): | |
857 """Tests that rsync -p preserves ACLs.""" | |
858 bucket1_uri = self.CreateBucket() | |
859 bucket2_uri = self.CreateBucket() | |
860 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', | |
861 contents='obj1') | |
862 # Set public-read (non-default) ACL so we can verify that rsync -p works. | |
863 self.RunGsUtil(['acl', 'set', 'public-read', suri(bucket1_uri, 'obj1')]) | |
864 | |
865 # Use @Retry as hedge against bucket listing eventual consistency. | |
866 @Retry(AssertionError, tries=3, timeout_secs=1) | |
867 def _Check1(): | |
868 """Tests rsync -p works as expected.""" | |
869 self.RunGsUtil(['rsync', '-d', '-p', suri(bucket1_uri), | |
870 suri(bucket2_uri)]) | |
871 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) | |
872 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) | |
873 self.assertEquals(listing1, set(['/obj1'])) | |
874 self.assertEquals(listing2, set(['/obj1'])) | |
875 acl1_json = self.RunGsUtil(['acl', 'get', suri(bucket1_uri, 'obj1')], | |
876 return_stdout=True) | |
877 acl2_json = self.RunGsUtil(['acl', 'get', suri(bucket2_uri, 'obj1')], | |
878 return_stdout=True) | |
879 self.assertEquals(acl1_json, acl2_json) | |
880 _Check1() | |
881 | |
882 # Use @Retry as hedge against bucket listing eventual consistency. | |
883 @Retry(AssertionError, tries=3, timeout_secs=1) | |
884 def _Check2(): | |
885 # Check that re-running the same rsync command causes no more changes. | |
886 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
887 ['rsync', '-d', '-p', suri(bucket1_uri), suri(bucket2_uri)], | |
888 return_stderr=True)) | |
889 _Check2() | |
890 | |
891 def test_rsync_to_nonexistent_bucket_subdir(self): | |
892 """Tests that rsync to non-existent bucket subdir works.""" | |
893 # Create dir with some objects and empty bucket. | |
894 tmpdir = self.CreateTempDir() | |
895 subdir = os.path.join(tmpdir, 'subdir') | |
896 os.mkdir(subdir) | |
897 bucket_url = self.CreateBucket() | |
898 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') | |
899 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') | |
900 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3') | |
901 | |
902 # Use @Retry as hedge against bucket listing eventual consistency. | |
903 @Retry(AssertionError, tries=3, timeout_secs=1) | |
904 def _Check1(): | |
905 """Tests rsync works as expected.""" | |
906 self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')]) | |
907 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
908 listing2 = _TailSet( | |
909 suri(bucket_url, 'subdir'), | |
910 self._FlatListBucket(bucket_url.clone_replace_name('subdir'))) | |
911 # Dir should have un-altered content. | |
912 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) | |
913 # Bucket subdir should have content like dir. | |
914 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj3'])) | |
915 _Check1() | |
916 | |
917 # Use @Retry as hedge against bucket listing eventual consistency. | |
918 @Retry(AssertionError, tries=3, timeout_secs=1) | |
919 def _Check2(): | |
920 # Check that re-running the same rsync command causes no more changes. | |
921 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
922 ['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')], | |
923 return_stderr=True)) | |
924 _Check2() | |
925 | |
926 def test_rsync_from_nonexistent_bucket(self): | |
927 """Tests that rsync from a non-existent bucket subdir fails gracefully.""" | |
928 tmpdir = self.CreateTempDir() | |
929 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') | |
930 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') | |
931 bucket_url_str = '%s://%s' % ( | |
932 self.default_provider, self.nonexistent_bucket_name) | |
933 stderr = self.RunGsUtil(['rsync', '-d', bucket_url_str, tmpdir], | |
934 expected_status=1, return_stderr=True) | |
935 self.assertIn('Caught non-retryable exception', stderr) | |
936 listing = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
937 # Dir should have un-altered content. | |
938 self.assertEquals(listing, set(['/obj1', '/obj2'])) | |
939 | |
940 def test_rsync_to_nonexistent_bucket(self): | |
941 """Tests that rsync from a non-existent bucket subdir fails gracefully.""" | |
942 tmpdir = self.CreateTempDir() | |
943 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') | |
944 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') | |
945 bucket_url_str = '%s://%s' % ( | |
946 self.default_provider, self.nonexistent_bucket_name) | |
947 stderr = self.RunGsUtil(['rsync', '-d', bucket_url_str, tmpdir], | |
948 expected_status=1, return_stderr=True) | |
949 self.assertIn('Caught non-retryable exception', stderr) | |
950 listing = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
951 # Dir should have un-altered content. | |
952 self.assertEquals(listing, set(['/obj1', '/obj2'])) | |
953 | |
954 def test_bucket_to_bucket_minus_d_with_overwrite_and_punc_chars(self): | |
955 """Tests that punc chars in filenames don't confuse sort order.""" | |
956 bucket1_uri = self.CreateBucket() | |
957 bucket2_uri = self.CreateBucket() | |
958 # Create 2 objects in each bucket, with one overwritten with a name that's | |
959 # less than the next name in destination bucket when encoded, but not when | |
960 # compared without encoding. | |
961 self.CreateObject(bucket_uri=bucket1_uri, object_name='e/obj1', | |
962 contents='obj1') | |
963 self.CreateObject(bucket_uri=bucket1_uri, object_name='e-1/obj2', | |
964 contents='obj2') | |
965 self.CreateObject(bucket_uri=bucket2_uri, object_name='e/obj1', | |
966 contents='OBJ1') | |
967 self.CreateObject(bucket_uri=bucket2_uri, object_name='e-1/obj2', | |
968 contents='obj2') | |
969 # Need to make sure the bucket listings are caught-up, otherwise the | |
970 # rsync may not see all objects and fail to synchronize correctly. | |
971 self.AssertNObjectsInBucket(bucket1_uri, 2) | |
972 self.AssertNObjectsInBucket(bucket2_uri, 2) | |
973 | |
974 # Use @Retry as hedge against bucket listing eventual consistency. | |
975 @Retry(AssertionError, tries=3, timeout_secs=1) | |
976 def _Check1(): | |
977 """Tests rsync works as expected.""" | |
978 self.RunGsUtil(['rsync', '-rd', suri(bucket1_uri), suri(bucket2_uri)]) | |
979 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) | |
980 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) | |
981 # First bucket should have un-altered content. | |
982 self.assertEquals(listing1, set(['/e/obj1', '/e-1/obj2'])) | |
983 self.assertEquals(listing2, set(['/e/obj1', '/e-1/obj2'])) | |
984 # Assert correct contents. | |
985 self.assertEquals('obj1', self.RunGsUtil( | |
986 ['cat', suri(bucket2_uri, 'e/obj1')], return_stdout=True)) | |
987 self.assertEquals('obj2', self.RunGsUtil( | |
988 ['cat', suri(bucket2_uri, 'e-1/obj2')], return_stdout=True)) | |
989 _Check1() | |
990 | |
991 # Use @Retry as hedge against bucket listing eventual consistency. | |
992 @Retry(AssertionError, tries=3, timeout_secs=1) | |
993 def _Check2(): | |
994 # Check that re-running the same rsync command causes no more changes. | |
995 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
996 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], | |
997 return_stderr=True)) | |
998 _Check2() | |
999 | |
1000 def test_dir_to_bucket_minus_x(self): | |
1001 """Tests that rsync -x option works correctly.""" | |
1002 # Create dir and bucket with 1 overlapping and 2 extra objects in each. | |
1003 tmpdir = self.CreateTempDir() | |
1004 bucket_uri = self.CreateBucket() | |
1005 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') | |
1006 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') | |
1007 self.CreateTempFile(tmpdir=tmpdir, file_name='obj3', contents='obj3') | |
1008 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', | |
1009 contents='obj2') | |
1010 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4', | |
1011 contents='obj4') | |
1012 self.CreateObject(bucket_uri=bucket_uri, object_name='obj5', | |
1013 contents='obj5') | |
1014 | |
1015 # Need to make sure the bucket listing is caught-up, otherwise the | |
1016 # first rsync may not see obj2 and overwrite it. | |
1017 self.AssertNObjectsInBucket(bucket_uri, 3) | |
1018 | |
1019 # Use @Retry as hedge against bucket listing eventual consistency. | |
1020 @Retry(AssertionError, tries=3, timeout_secs=1) | |
1021 def _Check1(): | |
1022 """Tests rsync works as expected.""" | |
1023 self.RunGsUtil(['rsync', '-d', '-x', 'obj[34]', tmpdir, suri(bucket_uri)]) | |
1024 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) | |
1025 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) | |
1026 # Dir should have un-altered content. | |
1027 self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3'])) | |
1028 # Bucket should have content like dir but ignoring obj3 from dir and not | |
1029 # deleting obj4 from bucket (per exclude regex). | |
1030 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj4'])) | |
1031 _Check1() | |
1032 | |
1033 # Use @Retry as hedge against bucket listing eventual consistency. | |
1034 @Retry(AssertionError, tries=3, timeout_secs=1) | |
1035 def _Check2(): | |
1036 # Check that re-running the same rsync command causes no more changes. | |
1037 self.assertEquals(NO_CHANGES, self.RunGsUtil( | |
1038 ['rsync', '-d', '-x', 'obj[34]', tmpdir, suri(bucket_uri)], | |
1039 return_stderr=True)) | |
1040 _Check2() | |
OLD | NEW |