| OLD | NEW |
| 1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2014 Google Inc. All Rights Reserved. | 2 # Copyright 2014 Google Inc. All Rights Reserved. |
| 3 # | 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
| 7 # | 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # | 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
| (...skipping 13 matching lines...) Expand all Loading... |
| 24 import re | 24 import re |
| 25 import tempfile | 25 import tempfile |
| 26 import textwrap | 26 import textwrap |
| 27 import traceback | 27 import traceback |
| 28 import urllib | 28 import urllib |
| 29 | 29 |
| 30 from boto import config | 30 from boto import config |
| 31 import crcmod | 31 import crcmod |
| 32 | 32 |
| 33 from gslib import copy_helper | 33 from gslib import copy_helper |
| 34 from gslib.bucket_listing_ref import BucketListingObject |
| 34 from gslib.cloud_api import NotFoundException | 35 from gslib.cloud_api import NotFoundException |
| 35 from gslib.command import Command | 36 from gslib.command import Command |
| 36 from gslib.command import DummyArgChecker | 37 from gslib.command import DummyArgChecker |
| 37 from gslib.command_argument import CommandArgument | 38 from gslib.command_argument import CommandArgument |
| 38 from gslib.copy_helper import CreateCopyHelperOpts | 39 from gslib.copy_helper import CreateCopyHelperOpts |
| 39 from gslib.copy_helper import SkipUnsupportedObjectError | 40 from gslib.copy_helper import SkipUnsupportedObjectError |
| 40 from gslib.cs_api_map import ApiSelector | 41 from gslib.cs_api_map import ApiSelector |
| 41 from gslib.exception import CommandException | 42 from gslib.exception import CommandException |
| 42 from gslib.hashing_helper import CalculateB64EncodedCrc32cFromContents | 43 from gslib.hashing_helper import CalculateB64EncodedCrc32cFromContents |
| 43 from gslib.hashing_helper import CalculateB64EncodedMd5FromContents | 44 from gslib.hashing_helper import CalculateB64EncodedMd5FromContents |
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 246 you are running a Python library for computing CRC32C, which is much slower | 247 you are running a Python library for computing CRC32C, which is much slower |
| 247 than using the compiled code. For information on getting a compiled CRC32C | 248 than using the compiled code. For information on getting a compiled CRC32C |
| 248 implementation, see 'gsutil help crc32c'. | 249 implementation, see 'gsutil help crc32c'. |
| 249 | 250 |
| 250 | 251 |
| 251 <B>LIMITATIONS</B> | 252 <B>LIMITATIONS</B> |
| 252 1. The gsutil rsync command doesn't make the destination object's timestamps | 253 1. The gsutil rsync command doesn't make the destination object's timestamps |
| 253 match those of the source object (it can't; timestamp setting is not | 254 match those of the source object (it can't; timestamp setting is not |
| 254 allowed by the GCS API). | 255 allowed by the GCS API). |
| 255 | 256 |
| 256 2. The gsutil rsync command ignores versioning, synchronizing only the live | 257 2. The gsutil rsync command considers only the current object generations in |
| 257 object versions in versioned buckets. | 258 the source and destination buckets when deciding what to copy / delete. If |
| 259 versioning is enabled in the destination bucket then gsutil rsync's |
| 260 overwriting or deleting objects will end up creating versions, but the |
| 261 command doesn't try to make the archived generations match in the source |
| 262 and destination buckets. |
| 263 |
| 258 | 264 |
| 259 | 265 |
| 260 <B>OPTIONS</B> | 266 <B>OPTIONS</B> |
| 261 -c Causes the rsync command to compute checksums for files if the | 267 -c Causes the rsync command to compute checksums for files if the |
| 262 size of source and destination match, and then compare | 268 size of source and destination match, and then compare |
| 263 checksums. This option increases local disk I/O and run time | 269 checksums. This option increases local disk I/O and run time |
| 264 if either src_url or dst_url are on the local file system. | 270 if either src_url or dst_url are on the local file system. |
| 265 | 271 |
| 266 -C If an error occurs, continue to attempt to copy the remaining | 272 -C If an error occurs, continue to attempt to copy the remaining |
| 267 files. If errors occurred, gsutil's exit status will be non-zero | 273 files. If errors occurred, gsutil's exit status will be non-zero |
| (...skipping 28 matching lines...) Expand all Loading... |
| 296 rsync -p if you want all objects in the destination bucket to | 302 rsync -p if you want all objects in the destination bucket to |
| 297 end up with the same ACL by setting a default object ACL on that | 303 end up with the same ACL by setting a default object ACL on that |
| 298 bucket instead of using rsync -p. See 'help gsutil defacl'. | 304 bucket instead of using rsync -p. See 'help gsutil defacl'. |
| 299 | 305 |
| 300 -R, -r Causes directories, buckets, and bucket subdirectories to be | 306 -R, -r Causes directories, buckets, and bucket subdirectories to be |
| 301 synchronized recursively. If you neglect to use this option | 307 synchronized recursively. If you neglect to use this option |
| 302 gsutil will make only the top-level directory in the source | 308 gsutil will make only the top-level directory in the source |
| 303 and destination URLs match, skipping any sub-directories. | 309 and destination URLs match, skipping any sub-directories. |
| 304 | 310 |
| 305 -U Skip objects with unsupported object types instead of failing. | 311 -U Skip objects with unsupported object types instead of failing. |
| 306 Unsupported object types are s3 glacier objects. | 312 Unsupported object types are Amazon S3 Objects in the GLACIER |
| 313 storage class. |
| 307 | 314 |
| 308 -x pattern Causes files/objects matching pattern to be excluded, i.e., any | 315 -x pattern Causes files/objects matching pattern to be excluded, i.e., any |
| 309 matching files/objects will not be copied or deleted. Note that | 316 matching files/objects will not be copied or deleted. Note that |
| 310 the pattern is a Python regular expression, not a wildcard (so, | 317 the pattern is a Python regular expression, not a wildcard (so, |
| 311 matching any string ending in 'abc' would be specified using | 318 matching any string ending in 'abc' would be specified using |
| 312 '.*abc' rather than '*abc'). Note also that the exclude path is | 319 '.*abc' rather than '*abc'). Note also that the exclude path is |
| 313 always relative (similar to Unix rsync or tar exclude options). | 320 always relative (similar to Unix rsync or tar exclude options). |
| 314 For example, if you run the command: | 321 For example, if you run the command: |
| 315 | 322 |
| 316 gsutil rsync -x 'data./.*\\.txt' dir gs://my-bucket | 323 gsutil rsync -x 'data./.*\\.txt' dir gs://my-bucket |
| (...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 465 # futile or could result in data loss - for example: | 472 # futile or could result in data loss - for example: |
| 466 # gsutil rsync -d gs://non-existent-bucket ./localdir | 473 # gsutil rsync -d gs://non-existent-bucket ./localdir |
| 467 # would delete files from localdir. | 474 # would delete files from localdir. |
| 468 cls.logger.error( | 475 cls.logger.error( |
| 469 'Caught non-retryable exception while listing %s: %s' % | 476 'Caught non-retryable exception while listing %s: %s' % |
| 470 (base_url_str, e)) | 477 (base_url_str, e)) |
| 471 cls.non_retryable_listing_failures = 1 | 478 cls.non_retryable_listing_failures = 1 |
| 472 out_file.close() | 479 out_file.close() |
| 473 | 480 |
| 474 | 481 |
| 482 def _LocalDirIterator(base_url): |
| 483 """A generator that yields a BLR for each file in a local directory. |
| 484 |
| 485 We use this function instead of WildcardIterator for listing a local |
| 486 directory without recursion, because the glob.globi implementation called |
| 487 by WildcardIterator skips "dot" files (which we don't want to do when |
| 488 synchronizing to or from a local directory). |
| 489 |
| 490 Args: |
| 491 base_url: URL for the directory over which to iterate. |
| 492 |
| 493 Yields: |
| 494 BucketListingObject for each file in the directory. |
| 495 """ |
| 496 for filename in os.listdir(base_url.object_name): |
| 497 filename = os.path.join(base_url.object_name, filename) |
| 498 if os.path.isfile(filename): |
| 499 yield BucketListingObject(StorageUrlFromString(filename), None) |
| 500 |
| 501 |
| 475 def _FieldedListingIterator(cls, gsutil_api, base_url_str, desc): | 502 def _FieldedListingIterator(cls, gsutil_api, base_url_str, desc): |
| 476 """Iterator over base_url_str formatting output per _BuildTmpOutputLine. | 503 """Iterator over base_url_str formatting output per _BuildTmpOutputLine. |
| 477 | 504 |
| 478 Args: | 505 Args: |
| 479 cls: Command instance. | 506 cls: Command instance. |
| 480 gsutil_api: gsutil Cloud API instance to use for bucket listing. | 507 gsutil_api: gsutil Cloud API instance to use for bucket listing. |
| 481 base_url_str: The top-level URL string over which to iterate. | 508 base_url_str: The top-level URL string over which to iterate. |
| 482 desc: 'source' or 'destination'. | 509 desc: 'source' or 'destination'. |
| 483 | 510 |
| 484 Yields: | 511 Yields: |
| 485 Output line formatted per _BuildTmpOutputLine. | 512 Output line formatted per _BuildTmpOutputLine. |
| 486 """ | 513 """ |
| 487 if cls.recursion_requested: | 514 base_url = StorageUrlFromString(base_url_str) |
| 488 wildcard = '%s/**' % base_url_str.rstrip('/\\') | 515 if base_url.scheme == 'file' and not cls.recursion_requested: |
| 516 iterator = _LocalDirIterator(base_url) |
| 489 else: | 517 else: |
| 490 wildcard = '%s/*' % base_url_str.rstrip('/\\') | 518 if cls.recursion_requested: |
| 519 wildcard = '%s/**' % base_url_str.rstrip('/\\') |
| 520 else: |
| 521 wildcard = '%s/*' % base_url_str.rstrip('/\\') |
| 522 iterator = CreateWildcardIterator( |
| 523 wildcard, gsutil_api, debug=cls.debug, |
| 524 project_id=cls.project_id).IterObjects( |
| 525 # Request just the needed fields, to reduce bandwidth usage. |
| 526 bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size']) |
| 527 |
| 491 i = 0 | 528 i = 0 |
| 492 for blr in CreateWildcardIterator( | 529 for blr in iterator: |
| 493 wildcard, gsutil_api, debug=cls.debug, | |
| 494 project_id=cls.project_id).IterObjects( | |
| 495 # Request just the needed fields, to reduce bandwidth usage. | |
| 496 bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size']): | |
| 497 # Various GUI tools (like the GCS web console) create placeholder objects | 530 # Various GUI tools (like the GCS web console) create placeholder objects |
| 498 # ending with '/' when the user creates an empty directory. Normally these | 531 # ending with '/' when the user creates an empty directory. Normally these |
| 499 # tools should delete those placeholders once objects have been written | 532 # tools should delete those placeholders once objects have been written |
| 500 # "under" the directory, but sometimes the placeholders are left around. | 533 # "under" the directory, but sometimes the placeholders are left around. |
| 501 # We need to filter them out here, otherwise if the user tries to rsync | 534 # We need to filter them out here, otherwise if the user tries to rsync |
| 502 # from GCS to a local directory it will result in a directory/file | 535 # from GCS to a local directory it will result in a directory/file |
| 503 # conflict (e.g., trying to download an object called "mydata/" where the | 536 # conflict (e.g., trying to download an object called "mydata/" where the |
| 504 # local directory "mydata" exists). | 537 # local directory "mydata" exists). |
| 505 url = blr.storage_url | 538 url = blr.storage_url |
| 506 if IsCloudSubdirPlaceholder(url, blr=blr): | 539 if IsCloudSubdirPlaceholder(url, blr=blr): |
| 507 cls.logger.info('Skipping cloud sub-directory placeholder object (%s) ' | 540 # We used to output the message 'Skipping cloud sub-directory placeholder |
| 508 'because such objects aren\'t needed in (and would ' | 541 # object...' but we no longer do so because it caused customer confusion. |
| 509 'interfere with) directories in the local file system', | |
| 510 url) | |
| 511 continue | 542 continue |
| 512 if (cls.exclude_symlinks and url.IsFileUrl() | 543 if (cls.exclude_symlinks and url.IsFileUrl() |
| 513 and os.path.islink(url.object_name)): | 544 and os.path.islink(url.object_name)): |
| 514 continue | 545 continue |
| 515 if cls.exclude_pattern: | 546 if cls.exclude_pattern: |
| 516 str_to_check = url.url_string[len(base_url_str):] | 547 str_to_check = url.url_string[len(base_url_str):] |
| 517 if str_to_check.startswith(url.delim): | 548 if str_to_check.startswith(url.delim): |
| 518 str_to_check = str_to_check[1:] | 549 str_to_check = str_to_check[1:] |
| 519 if cls.exclude_pattern.match(str_to_check): | 550 if cls.exclude_pattern.match(str_to_check): |
| 520 continue | 551 continue |
| (...skipping 502 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1023 elif o == '-x': | 1054 elif o == '-x': |
| 1024 if not a: | 1055 if not a: |
| 1025 raise CommandException('Invalid blank exclude filter') | 1056 raise CommandException('Invalid blank exclude filter') |
| 1026 try: | 1057 try: |
| 1027 self.exclude_pattern = re.compile(a) | 1058 self.exclude_pattern = re.compile(a) |
| 1028 except re.error: | 1059 except re.error: |
| 1029 raise CommandException('Invalid exclude filter (%s)' % a) | 1060 raise CommandException('Invalid exclude filter (%s)' % a) |
| 1030 return CreateCopyHelperOpts( | 1061 return CreateCopyHelperOpts( |
| 1031 preserve_acl=preserve_acl, | 1062 preserve_acl=preserve_acl, |
| 1032 skip_unsupported_objects=self.skip_unsupported_objects) | 1063 skip_unsupported_objects=self.skip_unsupported_objects) |
| OLD | NEW |