Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(78)

Side by Side Diff: gslib/tests/test_rsync.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gslib/tests/test_rm.py ('k') | gslib/tests/test_rsync_funcs.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 # -*- coding: utf-8 -*-
2 # Copyright 2014 Google Inc. All Rights Reserved.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Integration tests for rsync command."""
16
17 import os
18
19 import crcmod
20
21 import gslib.tests.testcase as testcase
22 from gslib.tests.testcase.integration_testcase import SkipForS3
23 from gslib.tests.util import ObjectToURI as suri
24 from gslib.tests.util import PerformsFileToObjectUpload
25 from gslib.tests.util import SetBotoConfigForTest
26 from gslib.tests.util import unittest
27 from gslib.util import IS_WINDOWS
28 from gslib.util import Retry
29 from gslib.util import UsingCrcmodExtension
30
31 NO_CHANGES = 'Building synchronization state...\nStarting synchronization\n'
32
33
34 def _TailSet(start_point, listing):
35 """Returns set of object name tails.
36
37 Tails can be compared between source and dest, past the point at which rsync
38 was done. For example if test ran rsync gs://bucket1/dir gs://bucket2/dir2,
39 the tails for listings from bucket1 would start after "dir", while the tails
40 for listings from bucket2 would start after "dir2".
41
42 Args:
43 start_point: The target of the rsync command, e.g., for the above command it
44 would be gs://bucket1/dir for the bucket1 listing results and
45 gs://bucket2/dir2 for the bucket2 listing results.
46 listing: The listing over which to compute tail.
47
48 Returns:
49 Object name tails.
50 """
51 return set(l[len(start_point):] for l in listing.strip().split('\n'))
52
53
54 class TestRsync(testcase.GsUtilIntegrationTestCase):
55 """Integration tests for rsync command."""
56
57 @staticmethod
58 def _FlatListDir(directory):
59 """Perform a flat listing over directory.
60
61 Args:
62 directory: The directory to list
63
64 Returns:
65 Listings with path separators canonicalized to '/', to make assertions
66 easier for Linux vs Windows.
67 """
68 result = []
69 for dirpath, _, filenames in os.walk(directory):
70 for f in filenames:
71 result.append(os.path.join(dirpath, f))
72 return '\n'.join(result).replace('\\', '/')
73
74 def _FlatListBucket(self, bucket_uri):
75 """Perform a flat listing over bucket_uri."""
76 return self.RunGsUtil(['ls', suri(bucket_uri, '**')], return_stdout=True)
77
78 def test_invalid_args(self):
79 """Tests various invalid argument cases."""
80 bucket_uri = self.CreateBucket()
81 obj1 = self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
82 contents='obj1')
83 tmpdir = self.CreateTempDir()
84 # rsync object to bucket.
85 self.RunGsUtil(['rsync', suri(obj1), suri(bucket_uri)], expected_status=1)
86 # rsync bucket to object.
87 self.RunGsUtil(['rsync', suri(bucket_uri), suri(obj1)], expected_status=1)
88 # rsync bucket to non-existent bucket.
89 self.RunGsUtil(['rsync', suri(bucket_uri), self.nonexistent_bucket_name],
90 expected_status=1)
91 # rsync object to dir.
92 self.RunGsUtil(['rsync', suri(obj1), tmpdir], expected_status=1)
93 # rsync dir to object.
94 self.RunGsUtil(['rsync', tmpdir, suri(obj1)], expected_status=1)
95 # rsync dir to non-existent bucket.
96 self.RunGsUtil(['rsync', tmpdir, suri(obj1), self.nonexistent_bucket_name],
97 expected_status=1)
98
99 # Note: The tests below exercise the cases
100 # {src_dir, src_bucket} X {dst_dir, dst_bucket}. We use gsutil rsync -d for
101 # all the cases but then have just one test without -d (test_bucket_to_bucket)
102 # as representative of handling without the -d option. This provides
103 # reasonable test coverage because the -d handling it src/dest URI-type
104 # independent, and keeps the test case combinations more manageable.
105
106 def test_bucket_to_bucket(self):
107 """Tests that flat and recursive rsync between 2 buckets works correctly."""
108 # Create 2 buckets with 1 overlapping object, 1 extra object at root level
109 # in each, and 1 extra object 1 level down in each. Make the overlapping
110 # objects named the same but with different content, to test that we detect
111 # and properly copy in that case.
112 bucket1_uri = self.CreateBucket()
113 bucket2_uri = self.CreateBucket()
114 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
115 contents='obj1')
116 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
117 contents='obj2')
118 self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3',
119 contents='subdir/obj3')
120 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
121 contents='OBJ2')
122 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4',
123 contents='obj4')
124 self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5',
125 contents='subdir/obj5')
126
127 # Use @Retry as hedge against bucket listing eventual consistency.
128 @Retry(AssertionError, tries=3, timeout_secs=1)
129 def _Check1():
130 """Tests rsync works as expected."""
131 self.RunGsUtil(['rsync', suri(bucket1_uri), suri(bucket2_uri)])
132 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
133 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
134 # First bucket should have un-altered content.
135 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
136 # Second bucket should have new objects added from source bucket (without
137 # removing extraneeous object found in dest bucket), and without the
138 # subdir objects synchronized.
139 self.assertEquals(listing2,
140 set(['/obj1', '/obj2', '/obj4', '/subdir/obj5']))
141 # Assert that the src/dest objects that had same length but different
142 # content were correctly synchronized (bucket to bucket sync uses
143 # checksums).
144 self.assertEquals('obj2', self.RunGsUtil(
145 ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True))
146 self.assertEquals('obj2', self.RunGsUtil(
147 ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True))
148 _Check1()
149
150 # Check that re-running the same rsync command causes no more changes.
151 self.assertEquals(NO_CHANGES, self.RunGsUtil(
152 ['rsync', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True))
153
154 # Now add and remove some objects in each bucket and test rsync -r.
155 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6',
156 contents='obj6')
157 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7',
158 contents='obj7')
159 self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
160 self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')])
161
162 # Use @Retry as hedge against bucket listing eventual consistency.
163 @Retry(AssertionError, tries=3, timeout_secs=1)
164 def _Check2():
165 self.RunGsUtil(['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)])
166 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
167 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
168 # First bucket should have un-altered content.
169 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
170 # Second bucket should have objects tha were newly added to first bucket
171 # (wihout removing extraneous dest bucket objects), and without the
172 # subdir objects synchronized.
173 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj4', '/obj6',
174 '/obj7', '/subdir/obj3',
175 '/subdir/obj5']))
176 _Check2()
177
178 # Check that re-running the same rsync command causes no more changes.
179 self.assertEquals(NO_CHANGES, self.RunGsUtil(
180 ['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)],
181 return_stderr=True))
182
183 def test_bucket_to_bucket_minus_d(self):
184 """Tests that flat and recursive rsync between 2 buckets works correctly."""
185 # Create 2 buckets with 1 overlapping object, 1 extra object at root level
186 # in each, and 1 extra object 1 level down in each. Make the overlapping
187 # objects named the same but with different content, to test that we detect
188 # and properly copy in that case.
189 bucket1_uri = self.CreateBucket()
190 bucket2_uri = self.CreateBucket()
191 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
192 contents='obj1')
193 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
194 contents='obj2')
195 self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3',
196 contents='subdir/obj3')
197 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
198 contents='OBJ2')
199 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4',
200 contents='obj4')
201 self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5',
202 contents='subdir/obj5')
203
204 # Use @Retry as hedge against bucket listing eventual consistency.
205 @Retry(AssertionError, tries=3, timeout_secs=1)
206 def _Check1():
207 """Tests rsync works as expected."""
208 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
209 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
210 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
211 # First bucket should have un-altered content.
212 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
213 # Second bucket should have content like first bucket but without the
214 # subdir objects synchronized.
215 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
216 # Assert that the src/dest objects that had same length but different
217 # content were correctly synchronized (bucket to bucket sync uses
218 # checksums).
219 self.assertEquals('obj2', self.RunGsUtil(
220 ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True))
221 self.assertEquals('obj2', self.RunGsUtil(
222 ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True))
223 _Check1()
224
225 # Check that re-running the same rsync command causes no more changes.
226 self.assertEquals(NO_CHANGES, self.RunGsUtil(
227 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
228 return_stderr=True))
229
230 # Now add and remove some objects in each bucket and test rsync -r.
231 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6',
232 contents='obj6')
233 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7',
234 contents='obj7')
235 self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
236 self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')])
237
238 # Use @Retry as hedge against bucket listing eventual consistency.
239 @Retry(AssertionError, tries=3, timeout_secs=1)
240 def _Check2():
241 self.RunGsUtil(['rsync', '-d', '-r',
242 suri(bucket1_uri), suri(bucket2_uri)])
243 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
244 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
245 # First bucket should have un-altered content.
246 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
247 # Second bucket should have content like first bucket but without the
248 # subdir objects synchronized.
249 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3']))
250 _Check2()
251
252 # Check that re-running the same rsync command causes no more changes.
253 self.assertEquals(NO_CHANGES, self.RunGsUtil(
254 ['rsync', '-d', '-r', suri(bucket1_uri), suri(bucket2_uri)],
255 return_stderr=True))
256
257 # Test sequential upload as well as parallel composite upload case.
258 @PerformsFileToObjectUpload
259 @unittest.skipUnless(UsingCrcmodExtension(crcmod),
260 'Test requires fast crcmod.')
261 def test_dir_to_bucket_minus_d(self):
262 """Tests that flat and recursive rsync dir to bucket works correctly."""
263 # Create dir and bucket with 1 overlapping object, 1 extra object at root
264 # level in each, and 1 extra object 1 level down in each. Make the
265 # overlapping objects named the same but with different content, to test
266 # that we detect and properly copy in that case.
267 tmpdir = self.CreateTempDir()
268 subdir = os.path.join(tmpdir, 'subdir')
269 os.mkdir(subdir)
270 bucket_uri = self.CreateBucket()
271 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
272 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
273 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3')
274 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
275 contents='OBJ2')
276 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4',
277 contents='obj4')
278 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5',
279 contents='subdir/obj5')
280
281 # Need to make sure the bucket listing is caught-up, otherwise the
282 # first rsync may not see obj2 and overwrite it.
283 self.AssertNObjectsInBucket(bucket_uri, 3)
284
285 # Use @Retry as hedge against bucket listing eventual consistency.
286 @Retry(AssertionError, tries=3, timeout_secs=1)
287 def _Check1():
288 """Tests rsync works as expected."""
289 self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
290 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
291 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
292 # Dir should have un-altered content.
293 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
294 # Bucket should have content like dir but without the subdir objects
295 # synchronized.
296 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
297 # Assert that the src/dest objects that had same length but different
298 # content were not synchronized (dir to bucket sync doesn't use checksums
299 # unless you specify -c).
300 with open(os.path.join(tmpdir, 'obj2')) as f:
301 self.assertEquals('obj2', '\n'.join(f.readlines()))
302 self.assertEquals('OBJ2', self.RunGsUtil(
303 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
304 _Check1()
305
306 # Check that re-running the same rsync command causes no more changes.
307 self.assertEquals(NO_CHANGES, self.RunGsUtil(
308 ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True))
309
310 # Now rerun the sync with the -c option.
311 # Use @Retry as hedge against bucket listing eventual consistency.
312 @Retry(AssertionError, tries=3, timeout_secs=1)
313 def _Check2():
314 """Tests rsync -c works as expected."""
315 self.RunGsUtil(['rsync', '-d', '-c', tmpdir, suri(bucket_uri)])
316 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
317 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
318 # Dir should have un-altered content.
319 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
320 # Bucket should have content like dir but without the subdir objects
321 # synchronized.
322 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
323 # Assert that the src/dest objects that had same length but different
324 # content were synchronized (dir to bucket sync with -c uses checksums).
325 with open(os.path.join(tmpdir, 'obj2')) as f:
326 self.assertEquals('obj2', '\n'.join(f.readlines()))
327 self.assertEquals('obj2', self.RunGsUtil(
328 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
329 _Check2()
330
331 # Check that re-running the same rsync command causes no more changes.
332 self.assertEquals(NO_CHANGES, self.RunGsUtil(
333 ['rsync', '-d', '-c', tmpdir, suri(bucket_uri)], return_stderr=True))
334
335 # Now add and remove some objects in dir and bucket and test rsync -r.
336 self.CreateTempFile(tmpdir=tmpdir, file_name='obj6', contents='obj6')
337 self.CreateObject(bucket_uri=bucket_uri, object_name='obj7',
338 contents='obj7')
339 os.unlink(os.path.join(tmpdir, 'obj1'))
340 self.RunGsUtil(['rm', suri(bucket_uri, 'obj2')])
341
342 # Use @Retry as hedge against bucket listing eventual consistency.
343 @Retry(AssertionError, tries=3, timeout_secs=1)
344 def _Check3():
345 self.RunGsUtil(['rsync', '-d', '-r', tmpdir, suri(bucket_uri)])
346 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
347 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
348 # Dir should have un-altered content.
349 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
350 # Bucket should have content like dir but without the subdir objects
351 # synchronized.
352 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3']))
353 _Check3()
354
355 # Check that re-running the same rsync command causes no more changes.
356 self.assertEquals(NO_CHANGES, self.RunGsUtil(
357 ['rsync', '-d', '-r', tmpdir, suri(bucket_uri)], return_stderr=True))
358
359 @unittest.skipUnless(UsingCrcmodExtension(crcmod),
360 'Test requires fast crcmod.')
361 def test_dir_to_dir_minus_d(self):
362 """Tests that flat and recursive rsync dir to dir works correctly."""
363 # Create 2 dirs with 1 overlapping file, 1 extra file at root
364 # level in each, and 1 extra file 1 level down in each. Make the
365 # overlapping files named the same but with different content, to test
366 # that we detect and properly copy in that case.
367 tmpdir1 = self.CreateTempDir()
368 tmpdir2 = self.CreateTempDir()
369 subdir1 = os.path.join(tmpdir1, 'subdir1')
370 subdir2 = os.path.join(tmpdir2, 'subdir2')
371 os.mkdir(subdir1)
372 os.mkdir(subdir2)
373 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj1', contents='obj1')
374 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj2', contents='obj2')
375 self.CreateTempFile(
376 tmpdir=subdir1, file_name='obj3', contents='subdir1/obj3')
377 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj2', contents='OBJ2')
378 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj4', contents='obj4')
379 self.CreateTempFile(
380 tmpdir=subdir2, file_name='obj5', contents='subdir2/obj5')
381
382 self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
383 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
384 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
385 # dir1 should have un-altered content.
386 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3']))
387 # dir2 should have content like dir1 but without the subdir1 objects
388 # synchronized.
389 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5']))
390 # Assert that the src/dest objects that had same length but different
391 # checksums were not synchronized (dir to dir sync doesn't use checksums
392 # unless you specify -c).
393 with open(os.path.join(tmpdir1, 'obj2')) as f:
394 self.assertEquals('obj2', '\n'.join(f.readlines()))
395 with open(os.path.join(tmpdir2, 'obj2')) as f:
396 self.assertEquals('OBJ2', '\n'.join(f.readlines()))
397
398 # Check that re-running the same rsync command causes no more changes.
399 self.assertEquals(NO_CHANGES, self.RunGsUtil(
400 ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
401
402 # Now rerun the sync with the -c option.
403 self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2])
404 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
405 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
406 # dir1 should have un-altered content.
407 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3']))
408 # dir2 should have content like dir but without the subdir objects
409 # synchronized.
410 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5']))
411 # Assert that the src/dest objects that had same length but different
412 # content were synchronized (dir to dir sync with -c uses checksums).
413 with open(os.path.join(tmpdir1, 'obj2')) as f:
414 self.assertEquals('obj2', '\n'.join(f.readlines()))
415 with open(os.path.join(tmpdir1, 'obj2')) as f:
416 self.assertEquals('obj2', '\n'.join(f.readlines()))
417
418 # Check that re-running the same rsync command causes no more changes.
419 self.assertEquals(NO_CHANGES, self.RunGsUtil(
420 ['rsync', '-d', '-c', tmpdir1, tmpdir2], return_stderr=True))
421
422 # Now add and remove some objects in both dirs and test rsync -r.
423 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj6', contents='obj6')
424 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj7', contents='obj7')
425 os.unlink(os.path.join(tmpdir1, 'obj1'))
426 os.unlink(os.path.join(tmpdir2, 'obj2'))
427
428 self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2])
429 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
430 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
431 # dir1 should have un-altered content.
432 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir1/obj3']))
433 # dir2 should have content like dir but without the subdir objects
434 # synchronized.
435 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir1/obj3']))
436
437 # Check that re-running the same rsync command causes no more changes.
438 self.assertEquals(NO_CHANGES, self.RunGsUtil(
439 ['rsync', '-d', '-r', tmpdir1, tmpdir2], return_stderr=True))
440
441 def test_dir_to_dir_minus_d_more_files_than_bufsize(self):
442 """Tests concurrently building listing from multiple tmp file ranges."""
443 # Create 2 dirs, where each dir has 1000 objects and differing names.
444 tmpdir1 = self.CreateTempDir()
445 tmpdir2 = self.CreateTempDir()
446 for i in range(0, 1000):
447 self.CreateTempFile(tmpdir=tmpdir1, file_name='d1-%s' %i, contents='x')
448 self.CreateTempFile(tmpdir=tmpdir2, file_name='d2-%s' %i, contents='y')
449
450 # We open a new temp file each time we reach rsync_buffer_lines of
451 # listing output. On Windows, this will result in a 'too many open file
452 # handles' error, so choose a larger value so as not to open so many files.
453 rsync_buffer_config = [('GSUtil', 'rsync_buffer_lines',
454 '50' if IS_WINDOWS else '2')]
455 # Run gsutil with config option to make buffer size << # files.
456 with SetBotoConfigForTest(rsync_buffer_config):
457 self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
458 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
459 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
460 self.assertEquals(listing1, listing2)
461
462 # Check that re-running the same rsync command causes no more changes.
463 self.assertEquals(NO_CHANGES, self.RunGsUtil(
464 ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
465
466 @unittest.skipUnless(UsingCrcmodExtension(crcmod),
467 'Test requires fast crcmod.')
468 def test_bucket_to_dir_minus_d(self):
469 """Tests that flat and recursive rsync bucket to dir works correctly."""
470 # Create bucket and dir with 1 overlapping object, 1 extra object at root
471 # level in each, and 1 extra object 1 level down in each. Make the
472 # overlapping objects named the same but with different content, to test
473 # that we detect and properly copy in that case.
474 bucket_uri = self.CreateBucket()
475 tmpdir = self.CreateTempDir()
476 subdir = os.path.join(tmpdir, 'subdir')
477 os.mkdir(subdir)
478 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
479 contents='obj1')
480 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
481 contents='obj2')
482 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj3',
483 contents='subdir/obj3')
484 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='OBJ2')
485 self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents='obj4')
486 self.CreateTempFile(tmpdir=subdir, file_name='obj5', contents='subdir/obj5')
487
488 # Use @Retry as hedge against bucket listing eventual consistency.
489 @Retry(AssertionError, tries=3, timeout_secs=1)
490 def _Check1():
491 """Tests rsync works as expected."""
492 self.RunGsUtil(['rsync', '-d', suri(bucket_uri), tmpdir])
493 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
494 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
495 # Bucket should have un-altered content.
496 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
497 # Dir should have content like bucket but without the subdir objects
498 # synchronized.
499 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
500 # Assert that the src/dest objects that had same length but different
501 # content were not synchronized (bucket to dir sync doesn't use checksums
502 # unless you specify -c).
503 self.assertEquals('obj2', self.RunGsUtil(
504 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
505 with open(os.path.join(tmpdir, 'obj2')) as f:
506 self.assertEquals('OBJ2', '\n'.join(f.readlines()))
507 _Check1()
508
509 # Check that re-running the same rsync command causes no more changes.
510 self.assertEquals(NO_CHANGES, self.RunGsUtil(
511 ['rsync', '-d', suri(bucket_uri), tmpdir], return_stderr=True))
512
513 # Now rerun the sync with the -c option.
514 # Use @Retry as hedge against bucket listing eventual consistency.
515 @Retry(AssertionError, tries=3, timeout_secs=1)
516 def _Check2():
517 """Tests rsync -c works as expected."""
518 self.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri), tmpdir])
519 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
520 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
521 # Bucket should have un-altered content.
522 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
523 # Dir should have content like bucket but without the subdir objects
524 # synchronized.
525 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
526 # Assert that the src/dest objects that had same length but different
527 # content were synchronized (bucket to dir sync with -c uses checksums).
528 self.assertEquals('obj2', self.RunGsUtil(
529 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
530 with open(os.path.join(tmpdir, 'obj2')) as f:
531 self.assertEquals('obj2', '\n'.join(f.readlines()))
532 _Check2()
533
534 # Check that re-running the same rsync command causes no more changes.
535 self.assertEquals(NO_CHANGES, self.RunGsUtil(
536 ['rsync', '-d', '-c', suri(bucket_uri), tmpdir], return_stderr=True))
537
538 # Now add and remove some objects in dir and bucket and test rsync -r.
539 self.CreateObject(bucket_uri=bucket_uri, object_name='obj6',
540 contents='obj6')
541 self.CreateTempFile(tmpdir=tmpdir, file_name='obj7', contents='obj7')
542 self.RunGsUtil(['rm', suri(bucket_uri, 'obj1')])
543 os.unlink(os.path.join(tmpdir, 'obj2'))
544
545 # Use @Retry as hedge against bucket listing eventual consistency.
546 @Retry(AssertionError, tries=3, timeout_secs=1)
547 def _Check3():
548 self.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri), tmpdir])
549 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
550 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
551 # Bucket should have un-altered content.
552 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
553 # Dir should have content like bucket but without the subdir objects
554 # synchronized.
555 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3']))
556 _Check3()
557
558 # Check that re-running the same rsync command causes no more changes.
559 self.assertEquals(NO_CHANGES, self.RunGsUtil(
560 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True))
561
562 def test_bucket_to_dir_minus_d_with_fname_case_change(self):
563 """Tests that name case changes work correctly.
564
565 Example:
566
567 Windows filenames are case-preserving in what you wrote, but case-
568 insensitive when compared. If you synchronize from FS to cloud and then
569 change case-naming in local files, you could end up with this situation:
570
571 Cloud copy is called .../TiVo/...
572 FS copy is called .../Tivo/...
573
574 Then, if you sync from cloud to FS, if rsync doesn't recognize that on
575 Windows these names are identical, each rsync run will cause both a copy
576 and a delete to be executed.
577 """
578 # Create bucket and dir with same objects, but dir copy has different name
579 # case.
580 bucket_uri = self.CreateBucket()
581 tmpdir = self.CreateTempDir()
582 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
583 contents='obj1')
584 self.CreateTempFile(tmpdir=tmpdir, file_name='Obj1', contents='obj1')
585
586 # Use @Retry as hedge against bucket listing eventual consistency.
587 @Retry(AssertionError, tries=3, timeout_secs=1)
588 def _Check1():
589 """Tests rsync works as expected."""
590 output = self.RunGsUtil(
591 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)
592 # Nothing should be copied or removed under Windows.
593 if IS_WINDOWS:
594 self.assertEquals(NO_CHANGES, output)
595 else:
596 self.assertNotEquals(NO_CHANGES, output)
597 _Check1()
598
599 def test_bucket_to_dir_minus_d_with_leftover_dir_placeholder(self):
600 """Tests that we correctly handle leftover dir placeholders.
601
602 See comments in gslib.commands.rsync._FieldedListingIterator for details.
603 """
604 bucket_uri = self.CreateBucket()
605 tmpdir = self.CreateTempDir()
606 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
607 contents='obj1')
608 # Create a placeholder like what can be left over by web GUI tools.
609 key_uri = bucket_uri.clone_replace_name('/')
610 key_uri.set_contents_from_string('')
611
612 # Use @Retry as hedge against bucket listing eventual consistency.
613 @Retry(AssertionError, tries=3, timeout_secs=1)
614 def _Check1():
615 """Tests rsync works as expected."""
616 output = self.RunGsUtil(
617 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)
618 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
619 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
620 # Bucket should have un-altered content.
621 self.assertEquals(listing1, set(['/obj1', '//']))
622 # Bucket should not have the placeholder object.
623 self.assertEquals(listing2, set(['/obj1']))
624 # Stdout should report what happened.
625 self.assertRegexpMatches(output, r'.*Skipping cloud sub-directory.*')
626 _Check1()
627
628 @unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
629 def test_rsync_minus_d_minus_e(self):
630 """Tests that rsync -e ignores symlinks."""
631 tmpdir = self.CreateTempDir()
632 subdir = os.path.join(tmpdir, 'subdir')
633 os.mkdir(subdir)
634 bucket_uri = self.CreateBucket()
635 fpath1 = self.CreateTempFile(
636 tmpdir=tmpdir, file_name='obj1', contents='obj1')
637 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
638 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3')
639 good_symlink_path = os.path.join(tmpdir, 'symlink1')
640 os.symlink(fpath1, good_symlink_path)
641 # Make a symlink that points to a non-existent path to test that -e also
642 # handles that case.
643 bad_symlink_path = os.path.join(tmpdir, 'symlink2')
644 os.symlink(os.path.join('/', 'non-existent'), bad_symlink_path)
645 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
646 contents='OBJ2')
647 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4',
648 contents='obj4')
649 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5',
650 contents='subdir/obj5')
651
652 # Use @Retry as hedge against bucket listing eventual consistency.
653 @Retry(AssertionError, tries=3, timeout_secs=1)
654 def _Check1():
655 """Ensure listings match the commented expectations."""
656 self.RunGsUtil(['rsync', '-d', '-e', tmpdir, suri(bucket_uri)])
657 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
658 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
659 # Dir should have un-altered content.
660 self.assertEquals(
661 listing1,
662 set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1', '/symlink2']))
663 # Bucket should have content like dir but without the symlink, and
664 # without subdir objects synchronized.
665 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
666 _Check1()
667
668 # Now remove invalid symlink and run without -e, and see that symlink gets
669 # copied (as file to which it points). Use @Retry as hedge against bucket
670 # listing eventual consistency.
671 os.unlink(bad_symlink_path)
672 @Retry(AssertionError, tries=3, timeout_secs=1)
673 def _Check2():
674 """Tests rsync works as expected."""
675 self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
676 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
677 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
678 # Dir should have un-altered content.
679 self.assertEquals(
680 listing1, set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1']))
681 # Bucket should have content like dir but without the symlink, and
682 # without subdir objects synchronized.
683 self.assertEquals(
684 listing2, set(['/obj1', '/obj2', '/subdir/obj5', '/symlink1']))
685 self.assertEquals('obj1', self.RunGsUtil(
686 ['cat', suri(bucket_uri, 'symlink1')], return_stdout=True))
687 _Check2()
688
689 # Check that re-running the same rsync command causes no more changes.
690 self.assertEquals(NO_CHANGES, self.RunGsUtil(
691 ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True))
692
693 @SkipForS3('S3 does not support composite objects')
694 def test_bucket_to_bucket_minus_d_with_composites(self):
695 """Tests that rsync works with composite objects (which don't have MD5s)."""
696 bucket1_uri = self.CreateBucket()
697 bucket2_uri = self.CreateBucket()
698 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
699 contents='obj1')
700 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
701 contents='obj2')
702 self.RunGsUtil(
703 ['compose', suri(bucket1_uri, 'obj1'), suri(bucket1_uri, 'obj2'),
704 suri(bucket1_uri, 'obj3')])
705 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
706 contents='OBJ2')
707 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4',
708 contents='obj4')
709
710 # Use @Retry as hedge against bucket listing eventual consistency.
711 @Retry(AssertionError, tries=3, timeout_secs=1)
712 def _Check():
713 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
714 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
715 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
716 # First bucket should have un-altered content.
717 self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3']))
718 # Second bucket should have content like first bucket but without the
719 # subdir objects synchronized.
720 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj3']))
721 _Check()
722
723 # Check that re-running the same rsync command causes no more changes.
724 self.assertEquals(NO_CHANGES, self.RunGsUtil(
725 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
726 return_stderr=True))
727
728 def test_bucket_to_bucket_minus_d_empty_dest(self):
729 """Tests working with empty dest bucket (iter runs out before src iter)."""
730 bucket1_uri = self.CreateBucket()
731 bucket2_uri = self.CreateBucket()
732 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
733 contents='obj1')
734 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
735 contents='obj2')
736
737 # Use @Retry as hedge against bucket listing eventual consistency.
738 @Retry(AssertionError, tries=3, timeout_secs=1)
739 def _Check():
740 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
741 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
742 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
743 self.assertEquals(listing1, set(['/obj1', '/obj2']))
744 self.assertEquals(listing2, set(['/obj1', '/obj2']))
745 _Check()
746
747 # Check that re-running the same rsync command causes no more changes.
748 self.assertEquals(NO_CHANGES, self.RunGsUtil(
749 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
750 return_stderr=True))
751
752 def test_bucket_to_bucket_minus_d_empty_src(self):
753 """Tests working with empty src bucket (iter runs out before dst iter)."""
754 bucket1_uri = self.CreateBucket()
755 bucket2_uri = self.CreateBucket()
756 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj1',
757 contents='obj1')
758 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
759 contents='obj2')
760
761 # Use @Retry as hedge against bucket listing eventual consistency.
762 @Retry(AssertionError, tries=3, timeout_secs=1)
763 def _Check():
764 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
765 stderr = self.RunGsUtil(['ls', suri(bucket1_uri, '**')],
766 expected_status=1, return_stderr=True)
767 self.assertIn('One or more URLs matched no objects', stderr)
768 stderr = self.RunGsUtil(['ls', suri(bucket2_uri, '**')],
769 expected_status=1, return_stderr=True)
770 self.assertIn('One or more URLs matched no objects', stderr)
771 _Check()
772
773 # Check that re-running the same rsync command causes no more changes.
774 self.assertEquals(NO_CHANGES, self.RunGsUtil(
775 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
776 return_stderr=True))
777
778 def test_rsync_minus_d_minus_p(self):
779 """Tests that rsync -p preserves ACLs."""
780 bucket1_uri = self.CreateBucket()
781 bucket2_uri = self.CreateBucket()
782 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
783 contents='obj1')
784 # Set public-read (non-default) ACL so we can verify that rsync -p works.
785 self.RunGsUtil(['acl', 'set', 'public-read', suri(bucket1_uri, 'obj1')])
786
787 # Use @Retry as hedge against bucket listing eventual consistency.
788 @Retry(AssertionError, tries=3, timeout_secs=1)
789 def _Check():
790 """Tests rsync -p works as expected."""
791 self.RunGsUtil(['rsync', '-d', '-p', suri(bucket1_uri),
792 suri(bucket2_uri)])
793 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
794 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
795 self.assertEquals(listing1, set(['/obj1']))
796 self.assertEquals(listing2, set(['/obj1']))
797 acl1_json = self.RunGsUtil(['acl', 'get', suri(bucket1_uri, 'obj1')],
798 return_stdout=True)
799 acl2_json = self.RunGsUtil(['acl', 'get', suri(bucket2_uri, 'obj1')],
800 return_stdout=True)
801 self.assertEquals(acl1_json, acl2_json)
802 _Check()
803
804 # Check that re-running the same rsync command causes no more changes.
805 self.assertEquals(NO_CHANGES, self.RunGsUtil(
806 ['rsync', '-d', '-p', suri(bucket1_uri), suri(bucket2_uri)],
807 return_stderr=True))
OLDNEW
« no previous file with comments | « gslib/tests/test_rm.py ('k') | gslib/tests/test_rsync_funcs.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698