| OLD | NEW |
| 1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2011 Google Inc. All Rights Reserved. | 2 # Copyright 2011 Google Inc. All Rights Reserved. |
| 3 # Copyright 2011, Nexenta Systems Inc. | 3 # Copyright 2011, Nexenta Systems Inc. |
| 4 # | 4 # |
| 5 # Licensed under the Apache License, Version 2.0 (the "License"); | 5 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 # you may not use this file except in compliance with the License. | 6 # you may not use this file except in compliance with the License. |
| 7 # You may obtain a copy of the License at | 7 # You may obtain a copy of the License at |
| 8 # | 8 # |
| 9 # http://www.apache.org/licenses/LICENSE-2.0 | 9 # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 # | 10 # |
| 11 # Unless required by applicable law or agreed to in writing, software | 11 # Unless required by applicable law or agreed to in writing, software |
| 12 # distributed under the License is distributed on an "AS IS" BASIS, | 12 # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 # See the License for the specific language governing permissions and | 14 # See the License for the specific language governing permissions and |
| 15 # limitations under the License. | 15 # limitations under the License. |
| 16 """Implementation of Unix-like cp command for cloud storage providers.""" | 16 """Implementation of Unix-like cp command for cloud storage providers.""" |
| 17 | 17 |
| 18 from __future__ import absolute_import | 18 from __future__ import absolute_import |
| 19 | 19 |
| 20 import os | 20 import os |
| 21 import time | 21 import time |
| 22 import traceback | 22 import traceback |
| 23 | 23 |
| 24 from gslib import copy_helper | 24 from gslib import copy_helper |
| 25 from gslib.cat_helper import CatHelper | 25 from gslib.cat_helper import CatHelper |
| 26 from gslib.cloud_api import AccessDeniedException | |
| 27 from gslib.cloud_api import NotFoundException | |
| 28 from gslib.command import Command | 26 from gslib.command import Command |
| 29 from gslib.command_argument import CommandArgument | 27 from gslib.command_argument import CommandArgument |
| 30 from gslib.commands.compose import MAX_COMPONENT_COUNT | 28 from gslib.commands.compose import MAX_COMPONENT_COUNT |
| 31 from gslib.copy_helper import CreateCopyHelperOpts | 29 from gslib.copy_helper import CreateCopyHelperOpts |
| 32 from gslib.copy_helper import ItemExistsError | 30 from gslib.copy_helper import ItemExistsError |
| 33 from gslib.copy_helper import Manifest | 31 from gslib.copy_helper import Manifest |
| 34 from gslib.copy_helper import PARALLEL_UPLOAD_TEMP_NAMESPACE | 32 from gslib.copy_helper import PARALLEL_UPLOAD_TEMP_NAMESPACE |
| 35 from gslib.copy_helper import SkipUnsupportedObjectError | 33 from gslib.copy_helper import SkipUnsupportedObjectError |
| 36 from gslib.cs_api_map import ApiSelector | 34 from gslib.cs_api_map import ApiSelector |
| 37 from gslib.exception import CommandException | 35 from gslib.exception import CommandException |
| (...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 202 in the cloud, which may take some time. Such operations can be resumed with | 200 in the cloud, which may take some time. Such operations can be resumed with |
| 203 the same command if they are interrupted, so long as the command parameters | 201 the same command if they are interrupted, so long as the command parameters |
| 204 are identical. | 202 are identical. |
| 205 | 203 |
| 206 Note that by default, the gsutil cp command does not copy the object | 204 Note that by default, the gsutil cp command does not copy the object |
| 207 ACL to the new object, and instead will use the default bucket ACL (see | 205 ACL to the new object, and instead will use the default bucket ACL (see |
| 208 "gsutil help defacl"). You can override this behavior with the -p | 206 "gsutil help defacl"). You can override this behavior with the -p |
| 209 option (see OPTIONS below). | 207 option (see OPTIONS below). |
| 210 | 208 |
| 211 One additional note about copying in the cloud: If the destination bucket has | 209 One additional note about copying in the cloud: If the destination bucket has |
| 212 versioning enabled, gsutil cp will copy all versions of the source object(s). | 210 versioning enabled, gsutil cp will by default copy only live versions of the |
| 213 For example: | 211 source object(s). For example: |
| 214 | 212 |
| 215 gsutil cp gs://bucket1/obj gs://bucket2 | 213 gsutil cp gs://bucket1/obj gs://bucket2 |
| 216 | 214 |
| 217 will cause all versions of gs://bucket1/obj to be copied to gs://bucket2. | 215 will cause only the single live version of of gs://bucket1/obj to be copied |
| 216 to gs://bucket2, even if there are archived versions of gs://bucket1/obj. To |
| 217 also copy archived versions, use the -A flag: |
| 218 |
| 219 gsutil cp -A gs://bucket1/obj gs://bucket2 |
| 220 |
| 221 The gsutil -m flag is disallowed when using the cp -A flag, to ensure that |
| 222 version ordering is preserved. |
| 218 """ | 223 """ |
| 219 | 224 |
| 220 _CHECKSUM_VALIDATION_TEXT = """ | 225 _CHECKSUM_VALIDATION_TEXT = """ |
| 221 <B>CHECKSUM VALIDATION</B> | 226 <B>CHECKSUM VALIDATION</B> |
| 222 At the end of every upload or download the gsutil cp command validates that | 227 At the end of every upload or download the gsutil cp command validates that |
| 223 the checksum it computes for the source file/object matches the checksum | 228 the checksum it computes for the source file/object matches the checksum |
| 224 the service computes. If the checksums do not match, gsutil will delete the | 229 the service computes. If the checksums do not match, gsutil will delete the |
| 225 corrupted object and print a warning message. This very rarely happens, but | 230 corrupted object and print a warning message. This very rarely happens, but |
| 226 if it does, please contact gs-team@google.com. | 231 if it does, please contact gs-team@google.com. |
| 227 | 232 |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 293 MiB. You do not need to specify any special command line options to make this | 298 MiB. You do not need to specify any special command line options to make this |
| 294 happen. If your upload is interrupted you can restart the upload by running | 299 happen. If your upload is interrupted you can restart the upload by running |
| 295 the same cp command that you ran to start the upload. Until the upload | 300 the same cp command that you ran to start the upload. Until the upload |
| 296 has completed successfully, it will not be visible at the destination object | 301 has completed successfully, it will not be visible at the destination object |
| 297 and will not replace any existing object the upload is intended to overwrite. | 302 and will not replace any existing object the upload is intended to overwrite. |
| 298 (However, see the section on PARALLEL COMPOSITE UPLOADS, which may leave | 303 (However, see the section on PARALLEL COMPOSITE UPLOADS, which may leave |
| 299 temporary component objects in place during the upload process.) | 304 temporary component objects in place during the upload process.) |
| 300 | 305 |
| 301 Similarly, gsutil automatically performs resumable downloads (using HTTP | 306 Similarly, gsutil automatically performs resumable downloads (using HTTP |
| 302 standard Range GET operations) whenever you use the cp command, unless the | 307 standard Range GET operations) whenever you use the cp command, unless the |
| 303 destination is a stream or null. In this case the partially downloaded file | 308 destination is a stream or null. In this case, a partially downloaded |
| 304 will be visible as soon as it starts being written. Thus, before you attempt | 309 temporary file will be visible in the destination directory. Upon completion, |
| 305 to use any files downloaded by gsutil you should make sure the download | 310 the original file is deleted and overwritten with the downloaded contents. |
| 306 completed successfully, by checking the exit status from the gsutil command. | |
| 307 This can be done in a bash script, for example, by doing: | |
| 308 | 311 |
| 309 gsutil cp gs://your-bucket/your-object ./local-file | 312 Resumable uploads and downloads store some state information in a files |
| 310 if [ "$status" -ne "0" ] ; then | |
| 311 << Code that handles failures >> | |
| 312 fi | |
| 313 | |
| 314 Resumable uploads and downloads store some state information in a file | |
| 315 in ~/.gsutil named by the destination object or file. If you attempt to | 313 in ~/.gsutil named by the destination object or file. If you attempt to |
| 316 resume a transfer from a machine with a different directory, the transfer | 314 resume a transfer from a machine with a different directory, the transfer |
| 317 will start over from scratch. | 315 will start over from scratch. |
| 318 | 316 |
| 319 See also "gsutil help prod" for details on using resumable transfers | 317 See also "gsutil help prod" for details on using resumable transfers |
| 320 in production. | 318 in production. |
| 321 """ | 319 """ |
| 322 | 320 |
| 323 _STREAMING_TRANSFERS_TEXT = """ | 321 _STREAMING_TRANSFERS_TEXT = """ |
| 324 <B>STREAMING TRANSFERS</B> | 322 <B>STREAMING TRANSFERS</B> |
| (...skipping 10 matching lines...) Expand all Loading... |
| 335 (say, more than 100 MiB) it is recommended to write the data to a local file | 333 (say, more than 100 MiB) it is recommended to write the data to a local file |
| 336 and then copy that file to the cloud rather than streaming it (and similarly | 334 and then copy that file to the cloud rather than streaming it (and similarly |
| 337 for large downloads). | 335 for large downloads). |
| 338 | 336 |
| 339 WARNING: When performing streaming transfers gsutil does not compute a | 337 WARNING: When performing streaming transfers gsutil does not compute a |
| 340 checksum of the uploaded or downloaded data. Therefore, we recommend that | 338 checksum of the uploaded or downloaded data. Therefore, we recommend that |
| 341 users either perform their own validation of the data or use non-streaming | 339 users either perform their own validation of the data or use non-streaming |
| 342 transfers (which perform integrity checking automatically). | 340 transfers (which perform integrity checking automatically). |
| 343 """ | 341 """ |
| 344 | 342 |
| 343 _SLICED_OBJECT_DOWNLOADS_TEXT = """ |
| 344 <B>SLICED OBJECT DOWNLOADS</B> |
| 345 gsutil automatically uses HTTP Range GET requests to perform "sliced" |
| 346 downloads in parallel for downloads of large objects. This means that, if |
| 347 enabled, disk space for the temporary download destination file will be |
| 348 pre-allocated and byte ranges (slices) within the file will be downloaded in |
| 349 parallel. Once all slices have completed downloading, the temporary file will |
| 350 be renamed to the destination file. No additional local disk space is |
| 351 required for this operation. |
| 352 |
| 353 This feature is only available for Google Cloud Storage objects because it |
| 354 requires a fast composable checksum that can be used to verify the data |
| 355 integrity of the slices. Thus, using sliced object downloads also requires a |
| 356 compiled crcmod (see "gsutil help crcmod") on the machine performing the |
| 357 download. If compiled crcmod is not available, normal download will instead |
| 358 be used. |
| 359 |
| 360 Note: since sliced object downloads cause multiple writes to occur at various |
| 361 locations on disk, this can degrade performance for disks with slow seek |
| 362 times, especially for large numbers of slices. While the default number of |
| 363 slices is small to avoid this, sliced object download can be completely |
| 364 disabled by setting the "sliced_object_download_threshold" variable in the |
| 365 .boto config file to 0. |
| 366 """ |
| 367 |
| 345 _PARALLEL_COMPOSITE_UPLOADS_TEXT = """ | 368 _PARALLEL_COMPOSITE_UPLOADS_TEXT = """ |
| 346 <B>PARALLEL COMPOSITE UPLOADS</B> | 369 <B>PARALLEL COMPOSITE UPLOADS</B> |
| 347 gsutil can automatically use | 370 gsutil can automatically use |
| 348 `object composition <https://developers.google.com/storage/docs/composite-obje
cts>`_ | 371 `object composition <https://developers.google.com/storage/docs/composite-obje
cts>`_ |
| 349 to perform uploads in parallel for large, local files being uploaded to Google | 372 to perform uploads in parallel for large, local files being uploaded to Google |
| 350 Cloud Storage. This means that, if enabled (see next paragraph), a large file | 373 Cloud Storage. This means that, if enabled (see next paragraph), a large file |
| 351 will be split into component pieces that will be uploaded in parallel. Those | 374 will be split into component pieces that will be uploaded in parallel. Those |
| 352 components will then be composed in the cloud, and the temporary components in | 375 components will then be composed in the cloud, and the temporary components in |
| 353 the cloud will be deleted after successful composition. No additional local | 376 the cloud will be deleted after successful composition. No additional local |
| 354 disk space is required for this operation. | 377 disk space is required for this operation. |
| 355 | 378 |
| 356 Using parallel composite uploads presents a tradeoff between upload | 379 Using parallel composite uploads presents a tradeoff between upload |
| 357 performance and download configuration: If you enable parallel composite | 380 performance and download configuration: If you enable parallel composite |
| 358 uploads your uploads will run faster, but someone will need to install a | 381 uploads your uploads will run faster, but someone will need to install a |
| 359 compiled crcmod (see "gsutil help crcmod") on every machine where objects are | 382 compiled crcmod (see "gsutil help crcmod") on every machine where objects are |
| 360 downloaded by gsutil or other Python applications. For some distributions this | 383 downloaded by gsutil or other Python applications. For some distributions this |
| 361 is easy (e.g., it comes pre-installed on MacOS), but in some cases users have | 384 is easy (e.g., it comes pre-installed on MacOS), but in some cases users have |
| 362 found it difficult. Because of this at present parallel composite uploads are | 385 found it difficult. Because of this at present parallel composite uploads are |
| 363 disabled by default. Google is actively working with a number of the Linux | 386 disabled by default. Google is actively working with a number of the Linux |
| 364 distributions to get crcmod included with the stock distribution. Once that is | 387 distributions to get crcmod included with the stock distribution. Once that is |
| 365 done we will re-enable parallel composite uploads by default in gsutil. | 388 done we will re-enable parallel composite uploads by default in gsutil. |
| 366 | 389 |
| 390 Parallel composite uploads should not be used with NEARLINE storage |
| 391 class buckets, as doing this would incur an early deletion charge for each |
| 392 component object. |
| 393 |
| 367 To try parallel composite uploads you can run the command: | 394 To try parallel composite uploads you can run the command: |
| 368 | 395 |
| 369 gsutil -o GSUtil:parallel_composite_upload_threshold=150M cp bigfile gs://yo
ur-bucket | 396 gsutil -o GSUtil:parallel_composite_upload_threshold=150M cp bigfile gs://yo
ur-bucket |
| 370 | 397 |
| 371 where bigfile is larger than 150 MiB. When you do this notice that the upload | 398 where bigfile is larger than 150 MiB. When you do this notice that the upload |
| 372 progress indicator continuously updates for several different uploads at once | 399 progress indicator continuously updates for several different uploads at once |
| 373 (corresponding to each of the sections of the file being uploaded in | 400 (corresponding to each of the sections of the file being uploaded in |
| 374 parallel), until the parallel upload completes. If you then want to enable | 401 parallel), until the parallel upload completes. If you then want to enable |
| 375 parallel composite uploads for all of your future uploads (notwithstanding the | 402 parallel composite uploads for all of your future uploads (notwithstanding the |
| 376 caveats mentioned earlier), you can uncomment and set the | 403 caveats mentioned earlier), you can uncomment and set the |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 457 export TMPDIR=/some/directory | 484 export TMPDIR=/some/directory |
| 458 | 485 |
| 459 On Windows 7 you can change the TMPDIR environment variable from Start -> | 486 On Windows 7 you can change the TMPDIR environment variable from Start -> |
| 460 Computer -> System -> Advanced System Settings -> Environment Variables. | 487 Computer -> System -> Advanced System Settings -> Environment Variables. |
| 461 You need to reboot after making this change for it to take effect. (Rebooting | 488 You need to reboot after making this change for it to take effect. (Rebooting |
| 462 is not necessary after running the export command on Linux and MacOS.) | 489 is not necessary after running the export command on Linux and MacOS.) |
| 463 """ | 490 """ |
| 464 | 491 |
| 465 _OPTIONS_TEXT = """ | 492 _OPTIONS_TEXT = """ |
| 466 <B>OPTIONS</B> | 493 <B>OPTIONS</B> |
| 467 -a canned_acl Sets named canned_acl when uploaded objects created. See | 494 -a canned_acl Sets named canned_acl when uploaded objects created. See |
| 468 'gsutil help acls' for further details. | 495 'gsutil help acls' for further details. |
| 496 |
| 497 -A Copy all source versions from a source buckets/folders. |
| 498 If not set, only the live version of each source object is |
| 499 copied. Note: this option is only useful when the destination |
| 500 bucket has versioning enabled. |
| 469 | 501 |
| 470 -c If an error occurs, continue to attempt to copy the remaining | 502 -c If an error occurs, continue to attempt to copy the remaining |
| 471 files. If any copies were unsuccessful, gsutil's exit status | 503 files. If any copies were unsuccessful, gsutil's exit status |
| 472 will be non-zero even if this flag is set. This option is | 504 will be non-zero even if this flag is set. This option is |
| 473 implicitly set when running "gsutil -m cp...". Note: -c only | 505 implicitly set when running "gsutil -m cp...". Note: -c only |
| 474 applies to the actual copying operation. If an error occurs | 506 applies to the actual copying operation. If an error occurs |
| 475 while iterating over the files in the local directory (e.g., | 507 while iterating over the files in the local directory (e.g., |
| 476 invalid Unicode file name) gsutil will print an error message | 508 invalid Unicode file name) gsutil will print an error message |
| 477 and abort. | 509 and abort. |
| 478 | 510 |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 566 together. | 598 together. |
| 567 | 599 |
| 568 -R, -r Causes directories, buckets, and bucket subdirectories to be | 600 -R, -r Causes directories, buckets, and bucket subdirectories to be |
| 569 copied recursively. If you neglect to use this option for | 601 copied recursively. If you neglect to use this option for |
| 570 an upload, gsutil will copy any files it finds and skip any | 602 an upload, gsutil will copy any files it finds and skip any |
| 571 directories. Similarly, neglecting to specify -r for a download | 603 directories. Similarly, neglecting to specify -r for a download |
| 572 will cause gsutil to copy any objects at the current bucket | 604 will cause gsutil to copy any objects at the current bucket |
| 573 directory level, and skip any subdirectories. | 605 directory level, and skip any subdirectories. |
| 574 | 606 |
| 575 -U Skip objects with unsupported object types instead of failing. | 607 -U Skip objects with unsupported object types instead of failing. |
| 576 Unsupported object types are s3 glacier objects. | 608 Unsupported object types are Amazon S3 Objects in the GLACIER |
| 609 storage class. |
| 577 | 610 |
| 578 -v Requests that the version-specific URL for each uploaded object | 611 -v Requests that the version-specific URL for each uploaded object |
| 579 be printed. Given this URL you can make future upload requests | 612 be printed. Given this URL you can make future upload requests |
| 580 that are safe in the face of concurrent updates, because Google | 613 that are safe in the face of concurrent updates, because Google |
| 581 Cloud Storage will refuse to perform the update if the current | 614 Cloud Storage will refuse to perform the update if the current |
| 582 object version doesn't match the version-specific URL. See | 615 object version doesn't match the version-specific URL. See |
| 583 'gsutil help versions' for more details. | 616 'gsutil help versions' for more details. |
| 584 | 617 |
| 585 -z <ext,...> Applies gzip content-encoding to file uploads with the given | 618 -z <ext,...> Applies gzip content-encoding to file uploads with the given |
| 586 extensions. This is useful when uploading files with | 619 extensions. This is useful when uploading files with |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 619 | 652 |
| 620 _DETAILED_HELP_TEXT = '\n\n'.join([_SYNOPSIS_TEXT, | 653 _DETAILED_HELP_TEXT = '\n\n'.join([_SYNOPSIS_TEXT, |
| 621 _DESCRIPTION_TEXT, | 654 _DESCRIPTION_TEXT, |
| 622 _NAME_CONSTRUCTION_TEXT, | 655 _NAME_CONSTRUCTION_TEXT, |
| 623 _SUBDIRECTORIES_TEXT, | 656 _SUBDIRECTORIES_TEXT, |
| 624 _COPY_IN_CLOUD_TEXT, | 657 _COPY_IN_CLOUD_TEXT, |
| 625 _CHECKSUM_VALIDATION_TEXT, | 658 _CHECKSUM_VALIDATION_TEXT, |
| 626 _RETRY_HANDLING_TEXT, | 659 _RETRY_HANDLING_TEXT, |
| 627 _RESUMABLE_TRANSFERS_TEXT, | 660 _RESUMABLE_TRANSFERS_TEXT, |
| 628 _STREAMING_TRANSFERS_TEXT, | 661 _STREAMING_TRANSFERS_TEXT, |
| 662 _SLICED_OBJECT_DOWNLOADS_TEXT, |
| 629 _PARALLEL_COMPOSITE_UPLOADS_TEXT, | 663 _PARALLEL_COMPOSITE_UPLOADS_TEXT, |
| 630 _CHANGING_TEMP_DIRECTORIES_TEXT, | 664 _CHANGING_TEMP_DIRECTORIES_TEXT, |
| 631 _OPTIONS_TEXT]) | 665 _OPTIONS_TEXT]) |
| 632 | 666 |
| 633 | 667 |
| 634 CP_SUB_ARGS = 'a:cDeIL:MNnprRtUvz:' | 668 CP_SUB_ARGS = 'a:AcDeIL:MNnprRtUvz:' |
| 635 | 669 |
| 636 | 670 |
| 637 def _CopyFuncWrapper(cls, args, thread_state=None): | 671 def _CopyFuncWrapper(cls, args, thread_state=None): |
| 638 cls.CopyFunc(args, thread_state=thread_state) | 672 cls.CopyFunc(args, thread_state=thread_state) |
| 639 | 673 |
| 640 | 674 |
| 641 def _CopyExceptionHandler(cls, e): | 675 def _CopyExceptionHandler(cls, e): |
| 642 """Simple exception handler to allow post-completion status.""" | 676 """Simple exception handler to allow post-completion status.""" |
| 643 cls.logger.error(str(e)) | 677 cls.logger.error(str(e)) |
| 644 cls.op_failure_count += 1 | 678 cls.op_failure_count += 1 |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 725 | 759 |
| 726 # Various GUI tools (like the GCS web console) create placeholder objects | 760 # Various GUI tools (like the GCS web console) create placeholder objects |
| 727 # ending with '/' when the user creates an empty directory. Normally these | 761 # ending with '/' when the user creates an empty directory. Normally these |
| 728 # tools should delete those placeholders once objects have been written | 762 # tools should delete those placeholders once objects have been written |
| 729 # "under" the directory, but sometimes the placeholders are left around. We | 763 # "under" the directory, but sometimes the placeholders are left around. We |
| 730 # need to filter them out here, otherwise if the user tries to rsync from | 764 # need to filter them out here, otherwise if the user tries to rsync from |
| 731 # GCS to a local directory it will result in a directory/file conflict | 765 # GCS to a local directory it will result in a directory/file conflict |
| 732 # (e.g., trying to download an object called "mydata/" where the local | 766 # (e.g., trying to download an object called "mydata/" where the local |
| 733 # directory "mydata" exists). | 767 # directory "mydata" exists). |
| 734 if IsCloudSubdirPlaceholder(exp_src_url): | 768 if IsCloudSubdirPlaceholder(exp_src_url): |
| 735 self.logger.info('Skipping cloud sub-directory placeholder object (%s) ' | 769 # We used to output the message 'Skipping cloud sub-directory placeholder |
| 736 'because such objects aren\'t needed in (and would ' | 770 # object...' but we no longer do so because it caused customer confusion. |
| 737 'interfere with) directories in the local file system', | |
| 738 exp_src_url) | |
| 739 return | 771 return |
| 740 | 772 |
| 741 if copy_helper_opts.use_manifest and self.manifest.WasSuccessful( | 773 if copy_helper_opts.use_manifest and self.manifest.WasSuccessful( |
| 742 exp_src_url.url_string): | 774 exp_src_url.url_string): |
| 743 return | 775 return |
| 744 | 776 |
| 745 if copy_helper_opts.perform_mv: | 777 if copy_helper_opts.perform_mv: |
| 746 if name_expansion_result.names_container: | 778 if name_expansion_result.names_container: |
| 747 # Use recursion_requested when performing name expansion for the | 779 # Use recursion_requested when performing name expansion for the |
| 748 # directory mv case so we can determine if any of the source URLs are | 780 # directory mv case so we can determine if any of the source URLs are |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 782 elapsed_time = bytes_transferred = 0 | 814 elapsed_time = bytes_transferred = 0 |
| 783 try: | 815 try: |
| 784 if copy_helper_opts.use_manifest: | 816 if copy_helper_opts.use_manifest: |
| 785 self.manifest.Initialize( | 817 self.manifest.Initialize( |
| 786 exp_src_url.url_string, dst_url.url_string) | 818 exp_src_url.url_string, dst_url.url_string) |
| 787 (elapsed_time, bytes_transferred, result_url, md5) = ( | 819 (elapsed_time, bytes_transferred, result_url, md5) = ( |
| 788 copy_helper.PerformCopy( | 820 copy_helper.PerformCopy( |
| 789 self.logger, exp_src_url, dst_url, gsutil_api, | 821 self.logger, exp_src_url, dst_url, gsutil_api, |
| 790 self, _CopyExceptionHandler, allow_splitting=True, | 822 self, _CopyExceptionHandler, allow_splitting=True, |
| 791 headers=self.headers, manifest=self.manifest, | 823 headers=self.headers, manifest=self.manifest, |
| 792 gzip_exts=self.gzip_exts, test_method=self.test_method)) | 824 gzip_exts=self.gzip_exts)) |
| 793 if copy_helper_opts.use_manifest: | 825 if copy_helper_opts.use_manifest: |
| 794 if md5: | 826 if md5: |
| 795 self.manifest.Set(exp_src_url.url_string, 'md5', md5) | 827 self.manifest.Set(exp_src_url.url_string, 'md5', md5) |
| 796 self.manifest.SetResult( | 828 self.manifest.SetResult( |
| 797 exp_src_url.url_string, bytes_transferred, 'OK') | 829 exp_src_url.url_string, bytes_transferred, 'OK') |
| 798 if copy_helper_opts.print_ver: | 830 if copy_helper_opts.print_ver: |
| 799 # Some cases don't return a version-specific URL (e.g., if destination | 831 # Some cases don't return a version-specific URL (e.g., if destination |
| 800 # is a file). | 832 # is a file). |
| 801 self.logger.info('Created: %s', result_url) | 833 self.logger.info('Created: %s', result_url) |
| 802 except ItemExistsError: | 834 except ItemExistsError: |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 865 url_strs = StdinIterator() | 897 url_strs = StdinIterator() |
| 866 else: | 898 else: |
| 867 if len(self.args) < 2: | 899 if len(self.args) < 2: |
| 868 raise CommandException('Wrong number of arguments for "cp" command.') | 900 raise CommandException('Wrong number of arguments for "cp" command.') |
| 869 url_strs = self.args[:-1] | 901 url_strs = self.args[:-1] |
| 870 | 902 |
| 871 (self.exp_dst_url, self.have_existing_dst_container) = ( | 903 (self.exp_dst_url, self.have_existing_dst_container) = ( |
| 872 copy_helper.ExpandUrlToSingleBlr(self.args[-1], self.gsutil_api, | 904 copy_helper.ExpandUrlToSingleBlr(self.args[-1], self.gsutil_api, |
| 873 self.debug, self.project_id)) | 905 self.debug, self.project_id)) |
| 874 | 906 |
| 875 # If the destination bucket has versioning enabled iterate with | |
| 876 # all_versions=True. That way we'll copy all versions if the source bucket | |
| 877 # is versioned; and by leaving all_versions=False if the destination bucket | |
| 878 # has versioning disabled we will avoid copying old versions all to the same | |
| 879 # un-versioned destination object. | |
| 880 all_versions = False | |
| 881 try: | |
| 882 bucket = self._GetBucketWithVersioningConfig(self.exp_dst_url) | |
| 883 if bucket and bucket.versioning and bucket.versioning.enabled: | |
| 884 all_versions = True | |
| 885 except AccessDeniedException: | |
| 886 # This happens (in the XML API only) if the user doesn't have OWNER access | |
| 887 # on the bucket (needed to check if versioning is enabled). In this case | |
| 888 # fall back to copying all versions (which can be inefficient for the | |
| 889 # reason noted in the comment above). We don't try to warn the user | |
| 890 # because that would result in false positive warnings (since we can't | |
| 891 # check if versioning is enabled on the destination bucket). | |
| 892 # | |
| 893 # For JSON, we will silently not return versioning if we don't have | |
| 894 # access. | |
| 895 all_versions = True | |
| 896 | |
| 897 name_expansion_iterator = NameExpansionIterator( | 907 name_expansion_iterator = NameExpansionIterator( |
| 898 self.command_name, self.debug, | 908 self.command_name, self.debug, |
| 899 self.logger, self.gsutil_api, url_strs, | 909 self.logger, self.gsutil_api, url_strs, |
| 900 self.recursion_requested or copy_helper_opts.perform_mv, | 910 self.recursion_requested or copy_helper_opts.perform_mv, |
| 901 project_id=self.project_id, all_versions=all_versions, | 911 project_id=self.project_id, all_versions=self.all_versions, |
| 902 continue_on_error=self.continue_on_error or self.parallel_operations) | 912 continue_on_error=self.continue_on_error or self.parallel_operations) |
| 903 | 913 |
| 904 # Use a lock to ensure accurate statistics in the face of | 914 # Use a lock to ensure accurate statistics in the face of |
| 905 # multi-threading/multi-processing. | 915 # multi-threading/multi-processing. |
| 906 self.stats_lock = CreateLock() | 916 self.stats_lock = CreateLock() |
| 907 | 917 |
| 908 # Tracks if any copies failed. | 918 # Tracks if any copies failed. |
| 909 self.op_failure_count = 0 | 919 self.op_failure_count = 0 |
| 910 | 920 |
| 911 # Start the clock. | 921 # Start the clock. |
| (...skipping 29 matching lines...) Expand all Loading... |
| 941 if self.debug == 3: | 951 if self.debug == 3: |
| 942 # Note that this only counts the actual GET and PUT bytes for the copy | 952 # Note that this only counts the actual GET and PUT bytes for the copy |
| 943 # - not any transfers for doing wildcard expansion, the initial | 953 # - not any transfers for doing wildcard expansion, the initial |
| 944 # HEAD/GET request performed to get the object metadata, etc. | 954 # HEAD/GET request performed to get the object metadata, etc. |
| 945 if self.total_bytes_transferred != 0: | 955 if self.total_bytes_transferred != 0: |
| 946 self.logger.info( | 956 self.logger.info( |
| 947 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)', | 957 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)', |
| 948 self.total_bytes_transferred, self.total_elapsed_time, | 958 self.total_bytes_transferred, self.total_elapsed_time, |
| 949 MakeHumanReadable(self.total_bytes_per_second)) | 959 MakeHumanReadable(self.total_bytes_per_second)) |
| 950 if self.op_failure_count: | 960 if self.op_failure_count: |
| 951 plural_str = 's' if self.op_failure_count else '' | 961 plural_str = 's' if self.op_failure_count > 1 else '' |
| 952 raise CommandException('%d file%s/object%s could not be transferred.' % ( | 962 raise CommandException('%d file%s/object%s could not be transferred.' % ( |
| 953 self.op_failure_count, plural_str, plural_str)) | 963 self.op_failure_count, plural_str, plural_str)) |
| 954 | 964 |
| 955 return 0 | 965 return 0 |
| 956 | 966 |
| 957 def _ParseOpts(self): | 967 def _ParseOpts(self): |
| 958 perform_mv = False | 968 perform_mv = False |
| 959 # exclude_symlinks is handled by Command parent class, so save in Command | 969 # exclude_symlinks is handled by Command parent class, so save in Command |
| 960 # state rather than CopyHelperOpts. | 970 # state rather than CopyHelperOpts. |
| 961 self.exclude_symlinks = False | 971 self.exclude_symlinks = False |
| 962 no_clobber = False | 972 no_clobber = False |
| 963 # continue_on_error is handled by Command parent class, so save in Command | 973 # continue_on_error is handled by Command parent class, so save in Command |
| 964 # state rather than CopyHelperOpts. | 974 # state rather than CopyHelperOpts. |
| 965 self.continue_on_error = False | 975 self.continue_on_error = False |
| 966 daisy_chain = False | 976 daisy_chain = False |
| 967 read_args_from_stdin = False | 977 read_args_from_stdin = False |
| 968 print_ver = False | 978 print_ver = False |
| 969 use_manifest = False | 979 use_manifest = False |
| 970 preserve_acl = False | 980 preserve_acl = False |
| 971 canned_acl = None | 981 canned_acl = None |
| 972 # canned_acl is handled by a helper function in parent | 982 # canned_acl is handled by a helper function in parent |
| 973 # Command class, so save in Command state rather than CopyHelperOpts. | 983 # Command class, so save in Command state rather than CopyHelperOpts. |
| 974 self.canned = None | 984 self.canned = None |
| 975 | 985 |
| 986 self.all_versions = False |
| 987 |
| 976 self.skip_unsupported_objects = False | 988 self.skip_unsupported_objects = False |
| 977 | 989 |
| 978 # Files matching these extensions should be gzipped before uploading. | 990 # Files matching these extensions should be gzipped before uploading. |
| 979 self.gzip_exts = [] | 991 self.gzip_exts = [] |
| 980 | 992 |
| 981 test_callback_file = None | 993 test_callback_file = None |
| 982 | 994 |
| 983 # self.recursion_requested initialized in command.py (so can be checked | 995 # self.recursion_requested initialized in command.py (so can be checked |
| 984 # in parent class for all commands). | 996 # in parent class for all commands). |
| 985 self.manifest = None | 997 self.manifest = None |
| 986 if self.sub_opts: | 998 if self.sub_opts: |
| 987 for o, a in self.sub_opts: | 999 for o, a in self.sub_opts: |
| 988 if o == '-a': | 1000 if o == '-a': |
| 989 canned_acl = a | 1001 canned_acl = a |
| 990 self.canned = True | 1002 self.canned = True |
| 1003 if o == '-A': |
| 1004 self.all_versions = True |
| 991 if o == '-c': | 1005 if o == '-c': |
| 992 self.continue_on_error = True | 1006 self.continue_on_error = True |
| 993 elif o == '-D': | 1007 elif o == '-D': |
| 994 daisy_chain = True | 1008 daisy_chain = True |
| 995 elif o == '-e': | 1009 elif o == '-e': |
| 996 self.exclude_symlinks = True | 1010 self.exclude_symlinks = True |
| 997 elif o == '--testcallbackfile': | 1011 elif o == '--testcallbackfile': |
| 998 # File path of a pickled class that implements ProgressCallback.call. | 1012 # File path of a pickled class that implements ProgressCallback.call. |
| 999 # Used for testing transfer interruptions and resumes. | 1013 # Used for testing transfer interruptions and resumes. |
| 1000 test_callback_file = a | 1014 test_callback_file = a |
| (...skipping 16 matching lines...) Expand all Loading... |
| 1017 self.recursion_requested = True | 1031 self.recursion_requested = True |
| 1018 elif o == '-U': | 1032 elif o == '-U': |
| 1019 self.skip_unsupported_objects = True | 1033 self.skip_unsupported_objects = True |
| 1020 elif o == '-v': | 1034 elif o == '-v': |
| 1021 print_ver = True | 1035 print_ver = True |
| 1022 elif o == '-z': | 1036 elif o == '-z': |
| 1023 self.gzip_exts = [x.strip() for x in a.split(',')] | 1037 self.gzip_exts = [x.strip() for x in a.split(',')] |
| 1024 if preserve_acl and canned_acl: | 1038 if preserve_acl and canned_acl: |
| 1025 raise CommandException( | 1039 raise CommandException( |
| 1026 'Specifying both the -p and -a options together is invalid.') | 1040 'Specifying both the -p and -a options together is invalid.') |
| 1041 if self.all_versions and self.parallel_operations: |
| 1042 raise CommandException( |
| 1043 'The gsutil -m option is not supported with the cp -A flag, to ' |
| 1044 'ensure that object version ordering is preserved. Please re-run ' |
| 1045 'the command without the -m option.') |
| 1027 return CreateCopyHelperOpts( | 1046 return CreateCopyHelperOpts( |
| 1028 perform_mv=perform_mv, | 1047 perform_mv=perform_mv, |
| 1029 no_clobber=no_clobber, | 1048 no_clobber=no_clobber, |
| 1030 daisy_chain=daisy_chain, | 1049 daisy_chain=daisy_chain, |
| 1031 read_args_from_stdin=read_args_from_stdin, | 1050 read_args_from_stdin=read_args_from_stdin, |
| 1032 print_ver=print_ver, | 1051 print_ver=print_ver, |
| 1033 use_manifest=use_manifest, | 1052 use_manifest=use_manifest, |
| 1034 preserve_acl=preserve_acl, | 1053 preserve_acl=preserve_acl, |
| 1035 canned_acl=canned_acl, | 1054 canned_acl=canned_acl, |
| 1036 skip_unsupported_objects=self.skip_unsupported_objects, | 1055 skip_unsupported_objects=self.skip_unsupported_objects, |
| 1037 test_callback_file=test_callback_file) | 1056 test_callback_file=test_callback_file) |
| 1038 | |
| 1039 def _GetBucketWithVersioningConfig(self, exp_dst_url): | |
| 1040 """Gets versioning config for a bucket and ensures that it exists. | |
| 1041 | |
| 1042 Args: | |
| 1043 exp_dst_url: Wildcard-expanded destination StorageUrl. | |
| 1044 | |
| 1045 Raises: | |
| 1046 AccessDeniedException: if there was a permissions problem accessing the | |
| 1047 bucket or its versioning config. | |
| 1048 CommandException: if URL refers to a cloud bucket that does not exist. | |
| 1049 | |
| 1050 Returns: | |
| 1051 apitools Bucket with versioning configuration. | |
| 1052 """ | |
| 1053 bucket = None | |
| 1054 if exp_dst_url.IsCloudUrl() and exp_dst_url.IsBucket(): | |
| 1055 try: | |
| 1056 bucket = self.gsutil_api.GetBucket( | |
| 1057 exp_dst_url.bucket_name, provider=exp_dst_url.scheme, | |
| 1058 fields=['versioning']) | |
| 1059 except AccessDeniedException, e: | |
| 1060 raise | |
| 1061 except NotFoundException, e: | |
| 1062 raise CommandException('Destination bucket %s does not exist.' % | |
| 1063 exp_dst_url) | |
| 1064 except Exception, e: | |
| 1065 raise CommandException('Error retrieving destination bucket %s: %s' % | |
| 1066 (exp_dst_url, e.message)) | |
| 1067 return bucket | |
| OLD | NEW |