OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2014 Google Inc. All Rights Reserved. |
| 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at |
| 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. |
| 15 """Integration tests for rsync command.""" |
| 16 |
| 17 import os |
| 18 |
| 19 import crcmod |
| 20 |
| 21 import gslib.tests.testcase as testcase |
| 22 from gslib.tests.testcase.integration_testcase import SkipForS3 |
| 23 from gslib.tests.util import ObjectToURI as suri |
| 24 from gslib.tests.util import PerformsFileToObjectUpload |
| 25 from gslib.tests.util import SetBotoConfigForTest |
| 26 from gslib.tests.util import unittest |
| 27 from gslib.util import IS_WINDOWS |
| 28 from gslib.util import Retry |
| 29 from gslib.util import UsingCrcmodExtension |
| 30 |
| 31 NO_CHANGES = 'Building synchronization state...\nStarting synchronization\n' |
| 32 |
| 33 |
| 34 def _TailSet(start_point, listing): |
| 35 """Returns set of object name tails. |
| 36 |
| 37 Tails can be compared between source and dest, past the point at which rsync |
| 38 was done. For example if test ran rsync gs://bucket1/dir gs://bucket2/dir2, |
| 39 the tails for listings from bucket1 would start after "dir", while the tails |
| 40 for listings from bucket2 would start after "dir2". |
| 41 |
| 42 Args: |
| 43 start_point: The target of the rsync command, e.g., for the above command it |
| 44 would be gs://bucket1/dir for the bucket1 listing results and |
| 45 gs://bucket2/dir2 for the bucket2 listing results. |
| 46 listing: The listing over which to compute tail. |
| 47 |
| 48 Returns: |
| 49 Object name tails. |
| 50 """ |
| 51 return set(l[len(start_point):] for l in listing.strip().split('\n')) |
| 52 |
| 53 # TODO: Add inspection to the retry wrappers in this test suite where the state |
| 54 # at the end of a retry block is depended upon by subsequent tests (since |
| 55 # listing content can vary depending on which backend server is reached until |
| 56 # eventual consistency is reached). |
| 57 # TODO: Remove retry wrappers and AssertNObjectsInBucket calls if GCS ever |
| 58 # supports strong listing consistency. |
| 59 class TestRsync(testcase.GsUtilIntegrationTestCase): |
| 60 """Integration tests for rsync command.""" |
| 61 |
| 62 @staticmethod |
| 63 def _FlatListDir(directory): |
| 64 """Perform a flat listing over directory. |
| 65 |
| 66 Args: |
| 67 directory: The directory to list |
| 68 |
| 69 Returns: |
| 70 Listings with path separators canonicalized to '/', to make assertions |
| 71 easier for Linux vs Windows. |
| 72 """ |
| 73 result = [] |
| 74 for dirpath, _, filenames in os.walk(directory): |
| 75 for f in filenames: |
| 76 result.append(os.path.join(dirpath, f)) |
| 77 return '\n'.join(result).replace('\\', '/') |
| 78 |
| 79 def _FlatListBucket(self, bucket_url_string): |
| 80 """Perform a flat listing over bucket_url_string.""" |
| 81 return self.RunGsUtil(['ls', suri(bucket_url_string, '**')], |
| 82 return_stdout=True) |
| 83 |
| 84 def test_invalid_args(self): |
| 85 """Tests various invalid argument cases.""" |
| 86 bucket_uri = self.CreateBucket() |
| 87 obj1 = self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', |
| 88 contents='obj1') |
| 89 tmpdir = self.CreateTempDir() |
| 90 # rsync object to bucket. |
| 91 self.RunGsUtil(['rsync', suri(obj1), suri(bucket_uri)], expected_status=1) |
| 92 # rsync bucket to object. |
| 93 self.RunGsUtil(['rsync', suri(bucket_uri), suri(obj1)], expected_status=1) |
| 94 # rsync bucket to non-existent bucket. |
| 95 self.RunGsUtil(['rsync', suri(bucket_uri), self.nonexistent_bucket_name], |
| 96 expected_status=1) |
| 97 # rsync object to dir. |
| 98 self.RunGsUtil(['rsync', suri(obj1), tmpdir], expected_status=1) |
| 99 # rsync dir to object. |
| 100 self.RunGsUtil(['rsync', tmpdir, suri(obj1)], expected_status=1) |
| 101 # rsync dir to non-existent bucket. |
| 102 self.RunGsUtil(['rsync', tmpdir, suri(obj1), self.nonexistent_bucket_name], |
| 103 expected_status=1) |
| 104 |
| 105 # Note: The tests below exercise the cases |
| 106 # {src_dir, src_bucket} X {dst_dir, dst_bucket}. We use gsutil rsync -d for |
| 107 # all the cases but then have just one test without -d (test_bucket_to_bucket) |
| 108 # as representative of handling without the -d option. This provides |
| 109 # reasonable test coverage because the -d handling it src/dest URI-type |
| 110 # independent, and keeps the test case combinations more manageable. |
| 111 |
| 112 def test_bucket_to_bucket(self): |
| 113 """Tests that flat and recursive rsync between 2 buckets works correctly.""" |
| 114 # Create 2 buckets with 1 overlapping object, 1 extra object at root level |
| 115 # in each, and 1 extra object 1 level down in each. Make the overlapping |
| 116 # objects named the same but with different content, to test that we detect |
| 117 # and properly copy in that case. |
| 118 bucket1_uri = self.CreateBucket() |
| 119 bucket2_uri = self.CreateBucket() |
| 120 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', |
| 121 contents='obj1') |
| 122 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', |
| 123 contents='obj2') |
| 124 self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3', |
| 125 contents='subdir/obj3') |
| 126 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', |
| 127 contents='OBJ2') |
| 128 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4', |
| 129 contents='obj4') |
| 130 self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5', |
| 131 contents='subdir/obj5') |
| 132 |
| 133 # Use @Retry as hedge against bucket listing eventual consistency. |
| 134 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 135 def _Check1(): |
| 136 """Tests rsync works as expected.""" |
| 137 self.RunGsUtil(['rsync', suri(bucket1_uri), suri(bucket2_uri)]) |
| 138 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) |
| 139 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) |
| 140 # First bucket should have un-altered content. |
| 141 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) |
| 142 # Second bucket should have new objects added from source bucket (without |
| 143 # removing extraneeous object found in dest bucket), and without the |
| 144 # subdir objects synchronized. |
| 145 self.assertEquals(listing2, |
| 146 set(['/obj1', '/obj2', '/obj4', '/subdir/obj5'])) |
| 147 # Assert that the src/dest objects that had same length but different |
| 148 # content were correctly synchronized (bucket to bucket sync uses |
| 149 # checksums). |
| 150 self.assertEquals('obj2', self.RunGsUtil( |
| 151 ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True)) |
| 152 self.assertEquals('obj2', self.RunGsUtil( |
| 153 ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True)) |
| 154 _Check1() |
| 155 |
| 156 # Use @Retry as hedge against bucket listing eventual consistency. |
| 157 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 158 def _Check2(): |
| 159 # Check that re-running the same rsync command causes no more changes. |
| 160 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 161 ['rsync', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) |
| 162 _Check2() |
| 163 |
| 164 # Now add and remove some objects in each bucket and test rsync -r. |
| 165 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6', |
| 166 contents='obj6') |
| 167 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7', |
| 168 contents='obj7') |
| 169 self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')]) |
| 170 self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')]) |
| 171 |
| 172 # Use @Retry as hedge against bucket listing eventual consistency. |
| 173 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 174 def _Check3(): |
| 175 self.RunGsUtil(['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)]) |
| 176 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) |
| 177 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) |
| 178 # First bucket should have un-altered content. |
| 179 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) |
| 180 # Second bucket should have objects tha were newly added to first bucket |
| 181 # (wihout removing extraneous dest bucket objects), and without the |
| 182 # subdir objects synchronized. |
| 183 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj4', '/obj6', |
| 184 '/obj7', '/subdir/obj3', |
| 185 '/subdir/obj5'])) |
| 186 _Check3() |
| 187 |
| 188 # Use @Retry as hedge against bucket listing eventual consistency. |
| 189 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 190 def _Check4(): |
| 191 # Check that re-running the same rsync command causes no more changes. |
| 192 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 193 ['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)], |
| 194 return_stderr=True)) |
| 195 _Check4() |
| 196 |
| 197 def test_bucket_to_bucket_minus_d(self): |
| 198 """Tests that flat and recursive rsync between 2 buckets works correctly.""" |
| 199 # Create 2 buckets with 1 overlapping object, 1 extra object at root level |
| 200 # in each, and 1 extra object 1 level down in each. Make the overlapping |
| 201 # objects named the same but with different content, to test that we detect |
| 202 # and properly copy in that case. |
| 203 bucket1_uri = self.CreateBucket() |
| 204 bucket2_uri = self.CreateBucket() |
| 205 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', |
| 206 contents='obj1') |
| 207 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', |
| 208 contents='obj2') |
| 209 self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3', |
| 210 contents='subdir/obj3') |
| 211 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', |
| 212 contents='OBJ2') |
| 213 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4', |
| 214 contents='obj4') |
| 215 self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5', |
| 216 contents='subdir/obj5') |
| 217 |
| 218 # Use @Retry as hedge against bucket listing eventual consistency. |
| 219 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 220 def _Check1(): |
| 221 """Tests rsync works as expected.""" |
| 222 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) |
| 223 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) |
| 224 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) |
| 225 # First bucket should have un-altered content. |
| 226 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) |
| 227 # Second bucket should have content like first bucket but without the |
| 228 # subdir objects synchronized. |
| 229 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) |
| 230 # Assert that the src/dest objects that had same length but different |
| 231 # content were correctly synchronized (bucket to bucket sync uses |
| 232 # checksums). |
| 233 self.assertEquals('obj2', self.RunGsUtil( |
| 234 ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True)) |
| 235 self.assertEquals('obj2', self.RunGsUtil( |
| 236 ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True)) |
| 237 _Check1() |
| 238 |
| 239 # Use @Retry as hedge against bucket listing eventual consistency. |
| 240 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 241 def _Check2(): |
| 242 # Check that re-running the same rsync command causes no more changes. |
| 243 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 244 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], |
| 245 return_stderr=True)) |
| 246 _Check2() |
| 247 |
| 248 # Now add and remove some objects in each bucket and test rsync -r. |
| 249 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6', |
| 250 contents='obj6') |
| 251 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7', |
| 252 contents='obj7') |
| 253 self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')]) |
| 254 self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')]) |
| 255 |
| 256 # Use @Retry as hedge against bucket listing eventual consistency. |
| 257 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 258 def _Check3(): |
| 259 self.RunGsUtil(['rsync', '-d', '-r', |
| 260 suri(bucket1_uri), suri(bucket2_uri)]) |
| 261 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) |
| 262 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) |
| 263 # First bucket should have un-altered content. |
| 264 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) |
| 265 # Second bucket should have content like first bucket but without the |
| 266 # subdir objects synchronized. |
| 267 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3'])) |
| 268 _Check3() |
| 269 |
| 270 # Use @Retry as hedge against bucket listing eventual consistency. |
| 271 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 272 def _Check4(): |
| 273 # Check that re-running the same rsync command causes no more changes. |
| 274 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 275 ['rsync', '-d', '-r', suri(bucket1_uri), suri(bucket2_uri)], |
| 276 return_stderr=True)) |
| 277 _Check4() |
| 278 |
| 279 # Test sequential upload as well as parallel composite upload case. |
| 280 @PerformsFileToObjectUpload |
| 281 @unittest.skipUnless(UsingCrcmodExtension(crcmod), |
| 282 'Test requires fast crcmod.') |
| 283 def test_dir_to_bucket_minus_d(self): |
| 284 """Tests that flat and recursive rsync dir to bucket works correctly.""" |
| 285 # Create dir and bucket with 1 overlapping object, 1 extra object at root |
| 286 # level in each, and 1 extra object 1 level down in each. Make the |
| 287 # overlapping objects named the same but with different content, to test |
| 288 # that we detect and properly copy in that case. |
| 289 tmpdir = self.CreateTempDir() |
| 290 subdir = os.path.join(tmpdir, 'subdir') |
| 291 os.mkdir(subdir) |
| 292 bucket_uri = self.CreateBucket() |
| 293 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') |
| 294 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') |
| 295 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3') |
| 296 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', |
| 297 contents='OBJ2') |
| 298 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4', |
| 299 contents='obj4') |
| 300 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5', |
| 301 contents='subdir/obj5') |
| 302 |
| 303 # Need to make sure the bucket listing is caught-up, otherwise the |
| 304 # first rsync may not see obj2 and overwrite it. |
| 305 self.AssertNObjectsInBucket(bucket_uri, 3) |
| 306 |
| 307 # Use @Retry as hedge against bucket listing eventual consistency. |
| 308 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 309 def _Check1(): |
| 310 """Tests rsync works as expected.""" |
| 311 self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)]) |
| 312 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 313 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 314 # Dir should have un-altered content. |
| 315 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) |
| 316 # Bucket should have content like dir but without the subdir objects |
| 317 # synchronized. |
| 318 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) |
| 319 # Assert that the src/dest objects that had same length but different |
| 320 # content were not synchronized (dir to bucket sync doesn't use checksums |
| 321 # unless you specify -c). |
| 322 with open(os.path.join(tmpdir, 'obj2')) as f: |
| 323 self.assertEquals('obj2', '\n'.join(f.readlines())) |
| 324 self.assertEquals('OBJ2', self.RunGsUtil( |
| 325 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) |
| 326 _Check1() |
| 327 |
| 328 # Use @Retry as hedge against bucket listing eventual consistency. |
| 329 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 330 def _Check2(): |
| 331 # Check that re-running the same rsync command causes no more changes. |
| 332 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 333 ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True)) |
| 334 _Check2() |
| 335 |
| 336 # Now rerun the sync with the -c option. |
| 337 # Use @Retry as hedge against bucket listing eventual consistency. |
| 338 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 339 def _Check3(): |
| 340 """Tests rsync -c works as expected.""" |
| 341 self.RunGsUtil(['rsync', '-d', '-c', tmpdir, suri(bucket_uri)]) |
| 342 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 343 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 344 # Dir should have un-altered content. |
| 345 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) |
| 346 # Bucket should have content like dir but without the subdir objects |
| 347 # synchronized. |
| 348 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) |
| 349 # Assert that the src/dest objects that had same length but different |
| 350 # content were synchronized (dir to bucket sync with -c uses checksums). |
| 351 with open(os.path.join(tmpdir, 'obj2')) as f: |
| 352 self.assertEquals('obj2', '\n'.join(f.readlines())) |
| 353 self.assertEquals('obj2', self.RunGsUtil( |
| 354 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) |
| 355 _Check3() |
| 356 |
| 357 # Use @Retry as hedge against bucket listing eventual consistency. |
| 358 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 359 def _Check4(): |
| 360 # Check that re-running the same rsync command causes no more changes. |
| 361 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 362 ['rsync', '-d', '-c', tmpdir, suri(bucket_uri)], return_stderr=True)) |
| 363 _Check4() |
| 364 |
| 365 # Now add and remove some objects in dir and bucket and test rsync -r. |
| 366 self.CreateTempFile(tmpdir=tmpdir, file_name='obj6', contents='obj6') |
| 367 self.CreateObject(bucket_uri=bucket_uri, object_name='obj7', |
| 368 contents='obj7') |
| 369 os.unlink(os.path.join(tmpdir, 'obj1')) |
| 370 self.RunGsUtil(['rm', suri(bucket_uri, 'obj2')]) |
| 371 |
| 372 # Use @Retry as hedge against bucket listing eventual consistency. |
| 373 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 374 def _Check5(): |
| 375 self.RunGsUtil(['rsync', '-d', '-r', tmpdir, suri(bucket_uri)]) |
| 376 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 377 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 378 # Dir should have un-altered content. |
| 379 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) |
| 380 # Bucket should have content like dir but without the subdir objects |
| 381 # synchronized. |
| 382 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3'])) |
| 383 _Check5() |
| 384 |
| 385 # Use @Retry as hedge against bucket listing eventual consistency. |
| 386 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 387 def _Check6(): |
| 388 # Check that re-running the same rsync command causes no more changes. |
| 389 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 390 ['rsync', '-d', '-r', tmpdir, suri(bucket_uri)], return_stderr=True)) |
| 391 _Check6() |
| 392 |
| 393 @unittest.skipUnless(UsingCrcmodExtension(crcmod), |
| 394 'Test requires fast crcmod.') |
| 395 def test_dir_to_dir_minus_d(self): |
| 396 """Tests that flat and recursive rsync dir to dir works correctly.""" |
| 397 # Create 2 dirs with 1 overlapping file, 1 extra file at root |
| 398 # level in each, and 1 extra file 1 level down in each. Make the |
| 399 # overlapping files named the same but with different content, to test |
| 400 # that we detect and properly copy in that case. |
| 401 tmpdir1 = self.CreateTempDir() |
| 402 tmpdir2 = self.CreateTempDir() |
| 403 subdir1 = os.path.join(tmpdir1, 'subdir1') |
| 404 subdir2 = os.path.join(tmpdir2, 'subdir2') |
| 405 os.mkdir(subdir1) |
| 406 os.mkdir(subdir2) |
| 407 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj1', contents='obj1') |
| 408 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj2', contents='obj2') |
| 409 self.CreateTempFile( |
| 410 tmpdir=subdir1, file_name='obj3', contents='subdir1/obj3') |
| 411 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj2', contents='OBJ2') |
| 412 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj4', contents='obj4') |
| 413 self.CreateTempFile( |
| 414 tmpdir=subdir2, file_name='obj5', contents='subdir2/obj5') |
| 415 |
| 416 self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2]) |
| 417 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) |
| 418 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) |
| 419 # dir1 should have un-altered content. |
| 420 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3'])) |
| 421 # dir2 should have content like dir1 but without the subdir1 objects |
| 422 # synchronized. |
| 423 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5'])) |
| 424 # Assert that the src/dest objects that had same length but different |
| 425 # checksums were not synchronized (dir to dir sync doesn't use checksums |
| 426 # unless you specify -c). |
| 427 with open(os.path.join(tmpdir1, 'obj2')) as f: |
| 428 self.assertEquals('obj2', '\n'.join(f.readlines())) |
| 429 with open(os.path.join(tmpdir2, 'obj2')) as f: |
| 430 self.assertEquals('OBJ2', '\n'.join(f.readlines())) |
| 431 |
| 432 # Use @Retry as hedge against bucket listing eventual consistency. |
| 433 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 434 def _Check1(): |
| 435 # Check that re-running the same rsync command causes no more changes. |
| 436 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 437 ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True)) |
| 438 _Check1() |
| 439 |
| 440 # Now rerun the sync with the -c option. |
| 441 self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2]) |
| 442 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) |
| 443 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) |
| 444 # dir1 should have un-altered content. |
| 445 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3'])) |
| 446 # dir2 should have content like dir but without the subdir objects |
| 447 # synchronized. |
| 448 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5'])) |
| 449 # Assert that the src/dest objects that had same length but different |
| 450 # content were synchronized (dir to dir sync with -c uses checksums). |
| 451 with open(os.path.join(tmpdir1, 'obj2')) as f: |
| 452 self.assertEquals('obj2', '\n'.join(f.readlines())) |
| 453 with open(os.path.join(tmpdir1, 'obj2')) as f: |
| 454 self.assertEquals('obj2', '\n'.join(f.readlines())) |
| 455 |
| 456 # Use @Retry as hedge against bucket listing eventual consistency. |
| 457 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 458 def _Check2(): |
| 459 # Check that re-running the same rsync command causes no more changes. |
| 460 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 461 ['rsync', '-d', '-c', tmpdir1, tmpdir2], return_stderr=True)) |
| 462 _Check2() |
| 463 |
| 464 # Now add and remove some objects in both dirs and test rsync -r. |
| 465 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj6', contents='obj6') |
| 466 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj7', contents='obj7') |
| 467 os.unlink(os.path.join(tmpdir1, 'obj1')) |
| 468 os.unlink(os.path.join(tmpdir2, 'obj2')) |
| 469 |
| 470 self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2]) |
| 471 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) |
| 472 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) |
| 473 # dir1 should have un-altered content. |
| 474 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir1/obj3'])) |
| 475 # dir2 should have content like dir but without the subdir objects |
| 476 # synchronized. |
| 477 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir1/obj3'])) |
| 478 |
| 479 # Use @Retry as hedge against bucket listing eventual consistency. |
| 480 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 481 def _Check3(): |
| 482 # Check that re-running the same rsync command causes no more changes. |
| 483 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 484 ['rsync', '-d', '-r', tmpdir1, tmpdir2], return_stderr=True)) |
| 485 _Check3() |
| 486 |
| 487 def test_dir_to_dir_minus_d_more_files_than_bufsize(self): |
| 488 """Tests concurrently building listing from multiple tmp file ranges.""" |
| 489 # Create 2 dirs, where each dir has 1000 objects and differing names. |
| 490 tmpdir1 = self.CreateTempDir() |
| 491 tmpdir2 = self.CreateTempDir() |
| 492 for i in range(0, 1000): |
| 493 self.CreateTempFile(tmpdir=tmpdir1, file_name='d1-%s' %i, contents='x') |
| 494 self.CreateTempFile(tmpdir=tmpdir2, file_name='d2-%s' %i, contents='y') |
| 495 |
| 496 # We open a new temp file each time we reach rsync_buffer_lines of |
| 497 # listing output. On Windows, this will result in a 'too many open file |
| 498 # handles' error, so choose a larger value so as not to open so many files. |
| 499 rsync_buffer_config = [('GSUtil', 'rsync_buffer_lines', |
| 500 '50' if IS_WINDOWS else '2')] |
| 501 # Run gsutil with config option to make buffer size << # files. |
| 502 with SetBotoConfigForTest(rsync_buffer_config): |
| 503 self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2]) |
| 504 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) |
| 505 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) |
| 506 self.assertEquals(listing1, listing2) |
| 507 |
| 508 # Use @Retry as hedge against bucket listing eventual consistency. |
| 509 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 510 def _Check(): |
| 511 # Check that re-running the same rsync command causes no more changes. |
| 512 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 513 ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True)) |
| 514 _Check() |
| 515 |
| 516 @unittest.skipUnless(UsingCrcmodExtension(crcmod), |
| 517 'Test requires fast crcmod.') |
| 518 def test_bucket_to_dir_minus_d(self): |
| 519 """Tests that flat and recursive rsync bucket to dir works correctly.""" |
| 520 # Create bucket and dir with 1 overlapping object, 1 extra object at root |
| 521 # level in each, and 1 extra object 1 level down in each. Make the |
| 522 # overlapping objects named the same but with different content, to test |
| 523 # that we detect and properly copy in that case. |
| 524 bucket_uri = self.CreateBucket() |
| 525 tmpdir = self.CreateTempDir() |
| 526 subdir = os.path.join(tmpdir, 'subdir') |
| 527 os.mkdir(subdir) |
| 528 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', |
| 529 contents='obj1') |
| 530 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', |
| 531 contents='obj2') |
| 532 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj3', |
| 533 contents='subdir/obj3') |
| 534 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='OBJ2') |
| 535 self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents='obj4') |
| 536 self.CreateTempFile(tmpdir=subdir, file_name='obj5', contents='subdir/obj5') |
| 537 |
| 538 # Use @Retry as hedge against bucket listing eventual consistency. |
| 539 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 540 def _Check1(): |
| 541 """Tests rsync works as expected.""" |
| 542 self.RunGsUtil(['rsync', '-d', suri(bucket_uri), tmpdir]) |
| 543 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 544 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 545 # Bucket should have un-altered content. |
| 546 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) |
| 547 # Dir should have content like bucket but without the subdir objects |
| 548 # synchronized. |
| 549 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) |
| 550 # Assert that the src/dest objects that had same length but different |
| 551 # content were not synchronized (bucket to dir sync doesn't use checksums |
| 552 # unless you specify -c). |
| 553 self.assertEquals('obj2', self.RunGsUtil( |
| 554 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) |
| 555 with open(os.path.join(tmpdir, 'obj2')) as f: |
| 556 self.assertEquals('OBJ2', '\n'.join(f.readlines())) |
| 557 _Check1() |
| 558 |
| 559 # Use @Retry as hedge against bucket listing eventual consistency. |
| 560 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 561 def _Check2(): |
| 562 # Check that re-running the same rsync command causes no more changes. |
| 563 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 564 ['rsync', '-d', suri(bucket_uri), tmpdir], return_stderr=True)) |
| 565 _Check2() |
| 566 |
| 567 # Now rerun the sync with the -c option. |
| 568 # Use @Retry as hedge against bucket listing eventual consistency. |
| 569 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 570 def _Check3(): |
| 571 """Tests rsync -c works as expected.""" |
| 572 self.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri), tmpdir]) |
| 573 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 574 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 575 # Bucket should have un-altered content. |
| 576 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) |
| 577 # Dir should have content like bucket but without the subdir objects |
| 578 # synchronized. |
| 579 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) |
| 580 # Assert that the src/dest objects that had same length but different |
| 581 # content were synchronized (bucket to dir sync with -c uses checksums). |
| 582 self.assertEquals('obj2', self.RunGsUtil( |
| 583 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) |
| 584 with open(os.path.join(tmpdir, 'obj2')) as f: |
| 585 self.assertEquals('obj2', '\n'.join(f.readlines())) |
| 586 _Check3() |
| 587 |
| 588 # Use @Retry as hedge against bucket listing eventual consistency. |
| 589 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 590 def _Check4(): |
| 591 # Check that re-running the same rsync command causes no more changes. |
| 592 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 593 ['rsync', '-d', '-c', suri(bucket_uri), tmpdir], return_stderr=True)) |
| 594 _Check4() |
| 595 |
| 596 # Now add and remove some objects in dir and bucket and test rsync -r. |
| 597 self.CreateObject(bucket_uri=bucket_uri, object_name='obj6', |
| 598 contents='obj6') |
| 599 self.CreateTempFile(tmpdir=tmpdir, file_name='obj7', contents='obj7') |
| 600 self.RunGsUtil(['rm', suri(bucket_uri, 'obj1')]) |
| 601 os.unlink(os.path.join(tmpdir, 'obj2')) |
| 602 |
| 603 # Use @Retry as hedge against bucket listing eventual consistency. |
| 604 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 605 def _Check5(): |
| 606 self.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri), tmpdir]) |
| 607 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 608 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 609 # Bucket should have un-altered content. |
| 610 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) |
| 611 # Dir should have content like bucket but without the subdir objects |
| 612 # synchronized. |
| 613 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3'])) |
| 614 _Check5() |
| 615 |
| 616 # Use @Retry as hedge against bucket listing eventual consistency. |
| 617 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 618 def _Check6(): |
| 619 # Check that re-running the same rsync command causes no more changes. |
| 620 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 621 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)) |
| 622 _Check6() |
| 623 |
| 624 def test_bucket_to_dir_minus_d_with_fname_case_change(self): |
| 625 """Tests that name case changes work correctly. |
| 626 |
| 627 Example: |
| 628 |
| 629 Windows filenames are case-preserving in what you wrote, but case- |
| 630 insensitive when compared. If you synchronize from FS to cloud and then |
| 631 change case-naming in local files, you could end up with this situation: |
| 632 |
| 633 Cloud copy is called .../TiVo/... |
| 634 FS copy is called .../Tivo/... |
| 635 |
| 636 Then, if you sync from cloud to FS, if rsync doesn't recognize that on |
| 637 Windows these names are identical, each rsync run will cause both a copy |
| 638 and a delete to be executed. |
| 639 """ |
| 640 # Create bucket and dir with same objects, but dir copy has different name |
| 641 # case. |
| 642 bucket_uri = self.CreateBucket() |
| 643 tmpdir = self.CreateTempDir() |
| 644 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', |
| 645 contents='obj1') |
| 646 self.CreateTempFile(tmpdir=tmpdir, file_name='Obj1', contents='obj1') |
| 647 |
| 648 # Use @Retry as hedge against bucket listing eventual consistency. |
| 649 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 650 def _Check1(): |
| 651 """Tests rsync works as expected.""" |
| 652 output = self.RunGsUtil( |
| 653 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True) |
| 654 # Nothing should be copied or removed under Windows. |
| 655 if IS_WINDOWS: |
| 656 self.assertEquals(NO_CHANGES, output) |
| 657 else: |
| 658 self.assertNotEquals(NO_CHANGES, output) |
| 659 _Check1() |
| 660 |
| 661 def test_bucket_to_dir_minus_d_with_leftover_dir_placeholder(self): |
| 662 """Tests that we correctly handle leftover dir placeholders. |
| 663 |
| 664 See comments in gslib.commands.rsync._FieldedListingIterator for details. |
| 665 """ |
| 666 bucket_uri = self.CreateBucket() |
| 667 tmpdir = self.CreateTempDir() |
| 668 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', |
| 669 contents='obj1') |
| 670 # Create a placeholder like what can be left over by web GUI tools. |
| 671 key_uri = bucket_uri.clone_replace_name('/') |
| 672 key_uri.set_contents_from_string('') |
| 673 |
| 674 # Use @Retry as hedge against bucket listing eventual consistency. |
| 675 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 676 def _Check1(): |
| 677 """Tests rsync works as expected.""" |
| 678 output = self.RunGsUtil( |
| 679 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True) |
| 680 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 681 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 682 # Bucket should have un-altered content. |
| 683 self.assertEquals(listing1, set(['/obj1', '//'])) |
| 684 # Bucket should not have the placeholder object. |
| 685 self.assertEquals(listing2, set(['/obj1'])) |
| 686 # Stdout should report what happened. |
| 687 self.assertRegexpMatches(output, r'.*Skipping cloud sub-directory.*') |
| 688 _Check1() |
| 689 |
| 690 @unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.') |
| 691 def test_rsync_minus_d_minus_e(self): |
| 692 """Tests that rsync -e ignores symlinks.""" |
| 693 tmpdir = self.CreateTempDir() |
| 694 subdir = os.path.join(tmpdir, 'subdir') |
| 695 os.mkdir(subdir) |
| 696 bucket_uri = self.CreateBucket() |
| 697 fpath1 = self.CreateTempFile( |
| 698 tmpdir=tmpdir, file_name='obj1', contents='obj1') |
| 699 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') |
| 700 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3') |
| 701 good_symlink_path = os.path.join(tmpdir, 'symlink1') |
| 702 os.symlink(fpath1, good_symlink_path) |
| 703 # Make a symlink that points to a non-existent path to test that -e also |
| 704 # handles that case. |
| 705 bad_symlink_path = os.path.join(tmpdir, 'symlink2') |
| 706 os.symlink(os.path.join('/', 'non-existent'), bad_symlink_path) |
| 707 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', |
| 708 contents='OBJ2') |
| 709 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4', |
| 710 contents='obj4') |
| 711 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5', |
| 712 contents='subdir/obj5') |
| 713 |
| 714 # Use @Retry as hedge against bucket listing eventual consistency. |
| 715 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 716 def _Check1(): |
| 717 """Ensure listings match the commented expectations.""" |
| 718 self.RunGsUtil(['rsync', '-d', '-e', tmpdir, suri(bucket_uri)]) |
| 719 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 720 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 721 # Dir should have un-altered content. |
| 722 self.assertEquals( |
| 723 listing1, |
| 724 set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1', '/symlink2'])) |
| 725 # Bucket should have content like dir but without the symlink, and |
| 726 # without subdir objects synchronized. |
| 727 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) |
| 728 _Check1() |
| 729 |
| 730 # Now remove invalid symlink and run without -e, and see that symlink gets |
| 731 # copied (as file to which it points). Use @Retry as hedge against bucket |
| 732 # listing eventual consistency. |
| 733 os.unlink(bad_symlink_path) |
| 734 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 735 def _Check2(): |
| 736 """Tests rsync works as expected.""" |
| 737 self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)]) |
| 738 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 739 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 740 # Dir should have un-altered content. |
| 741 self.assertEquals( |
| 742 listing1, set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1'])) |
| 743 # Bucket should have content like dir but without the symlink, and |
| 744 # without subdir objects synchronized. |
| 745 self.assertEquals( |
| 746 listing2, set(['/obj1', '/obj2', '/subdir/obj5', '/symlink1'])) |
| 747 self.assertEquals('obj1', self.RunGsUtil( |
| 748 ['cat', suri(bucket_uri, 'symlink1')], return_stdout=True)) |
| 749 _Check2() |
| 750 |
| 751 # Use @Retry as hedge against bucket listing eventual consistency. |
| 752 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 753 def _Check3(): |
| 754 # Check that re-running the same rsync command causes no more changes. |
| 755 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 756 ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True)) |
| 757 _Check3() |
| 758 |
| 759 @SkipForS3('S3 does not support composite objects') |
| 760 def test_bucket_to_bucket_minus_d_with_composites(self): |
| 761 """Tests that rsync works with composite objects (which don't have MD5s).""" |
| 762 bucket1_uri = self.CreateBucket() |
| 763 bucket2_uri = self.CreateBucket() |
| 764 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', |
| 765 contents='obj1') |
| 766 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', |
| 767 contents='obj2') |
| 768 self.RunGsUtil( |
| 769 ['compose', suri(bucket1_uri, 'obj1'), suri(bucket1_uri, 'obj2'), |
| 770 suri(bucket1_uri, 'obj3')]) |
| 771 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', |
| 772 contents='OBJ2') |
| 773 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4', |
| 774 contents='obj4') |
| 775 |
| 776 # Use @Retry as hedge against bucket listing eventual consistency. |
| 777 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 778 def _Check1(): |
| 779 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) |
| 780 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) |
| 781 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) |
| 782 # First bucket should have un-altered content. |
| 783 self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3'])) |
| 784 # Second bucket should have content like first bucket but without the |
| 785 # subdir objects synchronized. |
| 786 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj3'])) |
| 787 _Check1() |
| 788 |
| 789 # Use @Retry as hedge against bucket listing eventual consistency. |
| 790 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 791 def _Check2(): |
| 792 # Check that re-running the same rsync command causes no more changes. |
| 793 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 794 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], |
| 795 return_stderr=True)) |
| 796 _Check2() |
| 797 |
| 798 def test_bucket_to_bucket_minus_d_empty_dest(self): |
| 799 """Tests working with empty dest bucket (iter runs out before src iter).""" |
| 800 bucket1_uri = self.CreateBucket() |
| 801 bucket2_uri = self.CreateBucket() |
| 802 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', |
| 803 contents='obj1') |
| 804 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', |
| 805 contents='obj2') |
| 806 |
| 807 # Use @Retry as hedge against bucket listing eventual consistency. |
| 808 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 809 def _Check1(): |
| 810 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) |
| 811 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) |
| 812 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) |
| 813 self.assertEquals(listing1, set(['/obj1', '/obj2'])) |
| 814 self.assertEquals(listing2, set(['/obj1', '/obj2'])) |
| 815 _Check1() |
| 816 |
| 817 # Use @Retry as hedge against bucket listing eventual consistency. |
| 818 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 819 def _Check2(): |
| 820 # Check that re-running the same rsync command causes no more changes. |
| 821 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 822 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], |
| 823 return_stderr=True)) |
| 824 _Check2() |
| 825 |
| 826 def test_bucket_to_bucket_minus_d_empty_src(self): |
| 827 """Tests working with empty src bucket (iter runs out before dst iter).""" |
| 828 bucket1_uri = self.CreateBucket() |
| 829 bucket2_uri = self.CreateBucket() |
| 830 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj1', |
| 831 contents='obj1') |
| 832 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', |
| 833 contents='obj2') |
| 834 |
| 835 # Use @Retry as hedge against bucket listing eventual consistency. |
| 836 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 837 def _Check1(): |
| 838 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) |
| 839 stderr = self.RunGsUtil(['ls', suri(bucket1_uri, '**')], |
| 840 expected_status=1, return_stderr=True) |
| 841 self.assertIn('One or more URLs matched no objects', stderr) |
| 842 stderr = self.RunGsUtil(['ls', suri(bucket2_uri, '**')], |
| 843 expected_status=1, return_stderr=True) |
| 844 self.assertIn('One or more URLs matched no objects', stderr) |
| 845 _Check1() |
| 846 |
| 847 # Use @Retry as hedge against bucket listing eventual consistency. |
| 848 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 849 def _Check2(): |
| 850 # Check that re-running the same rsync command causes no more changes. |
| 851 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 852 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], |
| 853 return_stderr=True)) |
| 854 _Check2() |
| 855 |
| 856 def test_rsync_minus_d_minus_p(self): |
| 857 """Tests that rsync -p preserves ACLs.""" |
| 858 bucket1_uri = self.CreateBucket() |
| 859 bucket2_uri = self.CreateBucket() |
| 860 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', |
| 861 contents='obj1') |
| 862 # Set public-read (non-default) ACL so we can verify that rsync -p works. |
| 863 self.RunGsUtil(['acl', 'set', 'public-read', suri(bucket1_uri, 'obj1')]) |
| 864 |
| 865 # Use @Retry as hedge against bucket listing eventual consistency. |
| 866 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 867 def _Check1(): |
| 868 """Tests rsync -p works as expected.""" |
| 869 self.RunGsUtil(['rsync', '-d', '-p', suri(bucket1_uri), |
| 870 suri(bucket2_uri)]) |
| 871 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) |
| 872 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) |
| 873 self.assertEquals(listing1, set(['/obj1'])) |
| 874 self.assertEquals(listing2, set(['/obj1'])) |
| 875 acl1_json = self.RunGsUtil(['acl', 'get', suri(bucket1_uri, 'obj1')], |
| 876 return_stdout=True) |
| 877 acl2_json = self.RunGsUtil(['acl', 'get', suri(bucket2_uri, 'obj1')], |
| 878 return_stdout=True) |
| 879 self.assertEquals(acl1_json, acl2_json) |
| 880 _Check1() |
| 881 |
| 882 # Use @Retry as hedge against bucket listing eventual consistency. |
| 883 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 884 def _Check2(): |
| 885 # Check that re-running the same rsync command causes no more changes. |
| 886 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 887 ['rsync', '-d', '-p', suri(bucket1_uri), suri(bucket2_uri)], |
| 888 return_stderr=True)) |
| 889 _Check2() |
| 890 |
| 891 def test_rsync_to_nonexistent_bucket_subdir(self): |
| 892 """Tests that rsync to non-existent bucket subdir works.""" |
| 893 # Create dir with some objects and empty bucket. |
| 894 tmpdir = self.CreateTempDir() |
| 895 subdir = os.path.join(tmpdir, 'subdir') |
| 896 os.mkdir(subdir) |
| 897 bucket_url = self.CreateBucket() |
| 898 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') |
| 899 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') |
| 900 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3') |
| 901 |
| 902 # Use @Retry as hedge against bucket listing eventual consistency. |
| 903 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 904 def _Check1(): |
| 905 """Tests rsync works as expected.""" |
| 906 self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')]) |
| 907 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 908 listing2 = _TailSet( |
| 909 suri(bucket_url, 'subdir'), |
| 910 self._FlatListBucket(bucket_url.clone_replace_name('subdir'))) |
| 911 # Dir should have un-altered content. |
| 912 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) |
| 913 # Bucket subdir should have content like dir. |
| 914 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj3'])) |
| 915 _Check1() |
| 916 |
| 917 # Use @Retry as hedge against bucket listing eventual consistency. |
| 918 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 919 def _Check2(): |
| 920 # Check that re-running the same rsync command causes no more changes. |
| 921 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 922 ['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')], |
| 923 return_stderr=True)) |
| 924 _Check2() |
| 925 |
| 926 def test_rsync_from_nonexistent_bucket(self): |
| 927 """Tests that rsync from a non-existent bucket subdir fails gracefully.""" |
| 928 tmpdir = self.CreateTempDir() |
| 929 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') |
| 930 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') |
| 931 bucket_url_str = '%s://%s' % ( |
| 932 self.default_provider, self.nonexistent_bucket_name) |
| 933 stderr = self.RunGsUtil(['rsync', '-d', bucket_url_str, tmpdir], |
| 934 expected_status=1, return_stderr=True) |
| 935 self.assertIn('Caught non-retryable exception', stderr) |
| 936 listing = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 937 # Dir should have un-altered content. |
| 938 self.assertEquals(listing, set(['/obj1', '/obj2'])) |
| 939 |
| 940 def test_rsync_to_nonexistent_bucket(self): |
| 941 """Tests that rsync from a non-existent bucket subdir fails gracefully.""" |
| 942 tmpdir = self.CreateTempDir() |
| 943 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') |
| 944 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') |
| 945 bucket_url_str = '%s://%s' % ( |
| 946 self.default_provider, self.nonexistent_bucket_name) |
| 947 stderr = self.RunGsUtil(['rsync', '-d', bucket_url_str, tmpdir], |
| 948 expected_status=1, return_stderr=True) |
| 949 self.assertIn('Caught non-retryable exception', stderr) |
| 950 listing = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 951 # Dir should have un-altered content. |
| 952 self.assertEquals(listing, set(['/obj1', '/obj2'])) |
| 953 |
| 954 def test_bucket_to_bucket_minus_d_with_overwrite_and_punc_chars(self): |
| 955 """Tests that punc chars in filenames don't confuse sort order.""" |
| 956 bucket1_uri = self.CreateBucket() |
| 957 bucket2_uri = self.CreateBucket() |
| 958 # Create 2 objects in each bucket, with one overwritten with a name that's |
| 959 # less than the next name in destination bucket when encoded, but not when |
| 960 # compared without encoding. |
| 961 self.CreateObject(bucket_uri=bucket1_uri, object_name='e/obj1', |
| 962 contents='obj1') |
| 963 self.CreateObject(bucket_uri=bucket1_uri, object_name='e-1/obj2', |
| 964 contents='obj2') |
| 965 self.CreateObject(bucket_uri=bucket2_uri, object_name='e/obj1', |
| 966 contents='OBJ1') |
| 967 self.CreateObject(bucket_uri=bucket2_uri, object_name='e-1/obj2', |
| 968 contents='obj2') |
| 969 # Need to make sure the bucket listings are caught-up, otherwise the |
| 970 # rsync may not see all objects and fail to synchronize correctly. |
| 971 self.AssertNObjectsInBucket(bucket1_uri, 2) |
| 972 self.AssertNObjectsInBucket(bucket2_uri, 2) |
| 973 |
| 974 # Use @Retry as hedge against bucket listing eventual consistency. |
| 975 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 976 def _Check1(): |
| 977 """Tests rsync works as expected.""" |
| 978 self.RunGsUtil(['rsync', '-rd', suri(bucket1_uri), suri(bucket2_uri)]) |
| 979 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) |
| 980 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) |
| 981 # First bucket should have un-altered content. |
| 982 self.assertEquals(listing1, set(['/e/obj1', '/e-1/obj2'])) |
| 983 self.assertEquals(listing2, set(['/e/obj1', '/e-1/obj2'])) |
| 984 # Assert correct contents. |
| 985 self.assertEquals('obj1', self.RunGsUtil( |
| 986 ['cat', suri(bucket2_uri, 'e/obj1')], return_stdout=True)) |
| 987 self.assertEquals('obj2', self.RunGsUtil( |
| 988 ['cat', suri(bucket2_uri, 'e-1/obj2')], return_stdout=True)) |
| 989 _Check1() |
| 990 |
| 991 # Use @Retry as hedge against bucket listing eventual consistency. |
| 992 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 993 def _Check2(): |
| 994 # Check that re-running the same rsync command causes no more changes. |
| 995 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 996 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], |
| 997 return_stderr=True)) |
| 998 _Check2() |
| 999 |
| 1000 def test_dir_to_bucket_minus_x(self): |
| 1001 """Tests that rsync -x option works correctly.""" |
| 1002 # Create dir and bucket with 1 overlapping and 2 extra objects in each. |
| 1003 tmpdir = self.CreateTempDir() |
| 1004 bucket_uri = self.CreateBucket() |
| 1005 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') |
| 1006 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') |
| 1007 self.CreateTempFile(tmpdir=tmpdir, file_name='obj3', contents='obj3') |
| 1008 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', |
| 1009 contents='obj2') |
| 1010 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4', |
| 1011 contents='obj4') |
| 1012 self.CreateObject(bucket_uri=bucket_uri, object_name='obj5', |
| 1013 contents='obj5') |
| 1014 |
| 1015 # Need to make sure the bucket listing is caught-up, otherwise the |
| 1016 # first rsync may not see obj2 and overwrite it. |
| 1017 self.AssertNObjectsInBucket(bucket_uri, 3) |
| 1018 |
| 1019 # Use @Retry as hedge against bucket listing eventual consistency. |
| 1020 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 1021 def _Check1(): |
| 1022 """Tests rsync works as expected.""" |
| 1023 self.RunGsUtil(['rsync', '-d', '-x', 'obj[34]', tmpdir, suri(bucket_uri)]) |
| 1024 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) |
| 1025 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) |
| 1026 # Dir should have un-altered content. |
| 1027 self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3'])) |
| 1028 # Bucket should have content like dir but ignoring obj3 from dir and not |
| 1029 # deleting obj4 from bucket (per exclude regex). |
| 1030 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj4'])) |
| 1031 _Check1() |
| 1032 |
| 1033 # Use @Retry as hedge against bucket listing eventual consistency. |
| 1034 @Retry(AssertionError, tries=3, timeout_secs=1) |
| 1035 def _Check2(): |
| 1036 # Check that re-running the same rsync command causes no more changes. |
| 1037 self.assertEquals(NO_CHANGES, self.RunGsUtil( |
| 1038 ['rsync', '-d', '-x', 'obj[34]', tmpdir, suri(bucket_uri)], |
| 1039 return_stderr=True)) |
| 1040 _Check2() |
OLD | NEW |