OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 # Copyright 2011 Google Inc. All Rights Reserved. | 2 # Copyright 2011 Google Inc. All Rights Reserved. |
3 # Copyright 2011, Nexenta Systems Inc. | 3 # Copyright 2011, Nexenta Systems Inc. |
4 # | 4 # |
5 # Licensed under the Apache License, Version 2.0 (the "License"); | 5 # Licensed under the Apache License, Version 2.0 (the "License"); |
6 # you may not use this file except in compliance with the License. | 6 # you may not use this file except in compliance with the License. |
7 # You may obtain a copy of the License at | 7 # You may obtain a copy of the License at |
8 # | 8 # |
9 # http://www.apache.org/licenses/LICENSE-2.0 | 9 # http://www.apache.org/licenses/LICENSE-2.0 |
10 # | 10 # |
11 # Unless required by applicable law or agreed to in writing, software | 11 # Unless required by applicable law or agreed to in writing, software |
12 # distributed under the License is distributed on an "AS IS" BASIS, | 12 # distributed under the License is distributed on an "AS IS" BASIS, |
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 # See the License for the specific language governing permissions and | 14 # See the License for the specific language governing permissions and |
15 # limitations under the License. | 15 # limitations under the License. |
16 """Implementation of Unix-like cp command for cloud storage providers.""" | 16 """Implementation of Unix-like cp command for cloud storage providers.""" |
17 | 17 |
18 from __future__ import absolute_import | 18 from __future__ import absolute_import |
19 | 19 |
20 import os | 20 import os |
21 import time | 21 import time |
22 import traceback | 22 import traceback |
23 | 23 |
24 from gslib import copy_helper | 24 from gslib import copy_helper |
25 from gslib.cat_helper import CatHelper | 25 from gslib.cat_helper import CatHelper |
26 from gslib.cloud_api import AccessDeniedException | |
27 from gslib.cloud_api import NotFoundException | |
28 from gslib.command import Command | 26 from gslib.command import Command |
29 from gslib.command_argument import CommandArgument | 27 from gslib.command_argument import CommandArgument |
30 from gslib.commands.compose import MAX_COMPONENT_COUNT | 28 from gslib.commands.compose import MAX_COMPONENT_COUNT |
31 from gslib.copy_helper import CreateCopyHelperOpts | 29 from gslib.copy_helper import CreateCopyHelperOpts |
32 from gslib.copy_helper import ItemExistsError | 30 from gslib.copy_helper import ItemExistsError |
33 from gslib.copy_helper import Manifest | 31 from gslib.copy_helper import Manifest |
34 from gslib.copy_helper import PARALLEL_UPLOAD_TEMP_NAMESPACE | 32 from gslib.copy_helper import PARALLEL_UPLOAD_TEMP_NAMESPACE |
35 from gslib.copy_helper import SkipUnsupportedObjectError | 33 from gslib.copy_helper import SkipUnsupportedObjectError |
36 from gslib.cs_api_map import ApiSelector | 34 from gslib.cs_api_map import ApiSelector |
37 from gslib.exception import CommandException | 35 from gslib.exception import CommandException |
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
202 in the cloud, which may take some time. Such operations can be resumed with | 200 in the cloud, which may take some time. Such operations can be resumed with |
203 the same command if they are interrupted, so long as the command parameters | 201 the same command if they are interrupted, so long as the command parameters |
204 are identical. | 202 are identical. |
205 | 203 |
206 Note that by default, the gsutil cp command does not copy the object | 204 Note that by default, the gsutil cp command does not copy the object |
207 ACL to the new object, and instead will use the default bucket ACL (see | 205 ACL to the new object, and instead will use the default bucket ACL (see |
208 "gsutil help defacl"). You can override this behavior with the -p | 206 "gsutil help defacl"). You can override this behavior with the -p |
209 option (see OPTIONS below). | 207 option (see OPTIONS below). |
210 | 208 |
211 One additional note about copying in the cloud: If the destination bucket has | 209 One additional note about copying in the cloud: If the destination bucket has |
212 versioning enabled, gsutil cp will copy all versions of the source object(s). | 210 versioning enabled, gsutil cp will by default copy only live versions of the |
213 For example: | 211 source object(s). For example: |
214 | 212 |
215 gsutil cp gs://bucket1/obj gs://bucket2 | 213 gsutil cp gs://bucket1/obj gs://bucket2 |
216 | 214 |
217 will cause all versions of gs://bucket1/obj to be copied to gs://bucket2. | 215 will cause only the single live version of of gs://bucket1/obj to be copied |
| 216 to gs://bucket2, even if there are archived versions of gs://bucket1/obj. To |
| 217 also copy archived versions, use the -A flag: |
| 218 |
| 219 gsutil cp -A gs://bucket1/obj gs://bucket2 |
| 220 |
| 221 The gsutil -m flag is disallowed when using the cp -A flag, to ensure that |
| 222 version ordering is preserved. |
218 """ | 223 """ |
219 | 224 |
220 _CHECKSUM_VALIDATION_TEXT = """ | 225 _CHECKSUM_VALIDATION_TEXT = """ |
221 <B>CHECKSUM VALIDATION</B> | 226 <B>CHECKSUM VALIDATION</B> |
222 At the end of every upload or download the gsutil cp command validates that | 227 At the end of every upload or download the gsutil cp command validates that |
223 the checksum it computes for the source file/object matches the checksum | 228 the checksum it computes for the source file/object matches the checksum |
224 the service computes. If the checksums do not match, gsutil will delete the | 229 the service computes. If the checksums do not match, gsutil will delete the |
225 corrupted object and print a warning message. This very rarely happens, but | 230 corrupted object and print a warning message. This very rarely happens, but |
226 if it does, please contact gs-team@google.com. | 231 if it does, please contact gs-team@google.com. |
227 | 232 |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
293 MiB. You do not need to specify any special command line options to make this | 298 MiB. You do not need to specify any special command line options to make this |
294 happen. If your upload is interrupted you can restart the upload by running | 299 happen. If your upload is interrupted you can restart the upload by running |
295 the same cp command that you ran to start the upload. Until the upload | 300 the same cp command that you ran to start the upload. Until the upload |
296 has completed successfully, it will not be visible at the destination object | 301 has completed successfully, it will not be visible at the destination object |
297 and will not replace any existing object the upload is intended to overwrite. | 302 and will not replace any existing object the upload is intended to overwrite. |
298 (However, see the section on PARALLEL COMPOSITE UPLOADS, which may leave | 303 (However, see the section on PARALLEL COMPOSITE UPLOADS, which may leave |
299 temporary component objects in place during the upload process.) | 304 temporary component objects in place during the upload process.) |
300 | 305 |
301 Similarly, gsutil automatically performs resumable downloads (using HTTP | 306 Similarly, gsutil automatically performs resumable downloads (using HTTP |
302 standard Range GET operations) whenever you use the cp command, unless the | 307 standard Range GET operations) whenever you use the cp command, unless the |
303 destination is a stream or null. In this case the partially downloaded file | 308 destination is a stream or null. In this case, a partially downloaded |
304 will be visible as soon as it starts being written. Thus, before you attempt | 309 temporary file will be visible in the destination directory. Upon completion, |
305 to use any files downloaded by gsutil you should make sure the download | 310 the original file is deleted and overwritten with the downloaded contents. |
306 completed successfully, by checking the exit status from the gsutil command. | |
307 This can be done in a bash script, for example, by doing: | |
308 | 311 |
309 gsutil cp gs://your-bucket/your-object ./local-file | 312 Resumable uploads and downloads store some state information in a files |
310 if [ "$status" -ne "0" ] ; then | |
311 << Code that handles failures >> | |
312 fi | |
313 | |
314 Resumable uploads and downloads store some state information in a file | |
315 in ~/.gsutil named by the destination object or file. If you attempt to | 313 in ~/.gsutil named by the destination object or file. If you attempt to |
316 resume a transfer from a machine with a different directory, the transfer | 314 resume a transfer from a machine with a different directory, the transfer |
317 will start over from scratch. | 315 will start over from scratch. |
318 | 316 |
319 See also "gsutil help prod" for details on using resumable transfers | 317 See also "gsutil help prod" for details on using resumable transfers |
320 in production. | 318 in production. |
321 """ | 319 """ |
322 | 320 |
323 _STREAMING_TRANSFERS_TEXT = """ | 321 _STREAMING_TRANSFERS_TEXT = """ |
324 <B>STREAMING TRANSFERS</B> | 322 <B>STREAMING TRANSFERS</B> |
(...skipping 10 matching lines...) Expand all Loading... |
335 (say, more than 100 MiB) it is recommended to write the data to a local file | 333 (say, more than 100 MiB) it is recommended to write the data to a local file |
336 and then copy that file to the cloud rather than streaming it (and similarly | 334 and then copy that file to the cloud rather than streaming it (and similarly |
337 for large downloads). | 335 for large downloads). |
338 | 336 |
339 WARNING: When performing streaming transfers gsutil does not compute a | 337 WARNING: When performing streaming transfers gsutil does not compute a |
340 checksum of the uploaded or downloaded data. Therefore, we recommend that | 338 checksum of the uploaded or downloaded data. Therefore, we recommend that |
341 users either perform their own validation of the data or use non-streaming | 339 users either perform their own validation of the data or use non-streaming |
342 transfers (which perform integrity checking automatically). | 340 transfers (which perform integrity checking automatically). |
343 """ | 341 """ |
344 | 342 |
| 343 _SLICED_OBJECT_DOWNLOADS_TEXT = """ |
| 344 <B>SLICED OBJECT DOWNLOADS</B> |
| 345 gsutil automatically uses HTTP Range GET requests to perform "sliced" |
| 346 downloads in parallel for downloads of large objects. This means that, if |
| 347 enabled, disk space for the temporary download destination file will be |
| 348 pre-allocated and byte ranges (slices) within the file will be downloaded in |
| 349 parallel. Once all slices have completed downloading, the temporary file will |
| 350 be renamed to the destination file. No additional local disk space is |
| 351 required for this operation. |
| 352 |
| 353 This feature is only available for Google Cloud Storage objects because it |
| 354 requires a fast composable checksum that can be used to verify the data |
| 355 integrity of the slices. Thus, using sliced object downloads also requires a |
| 356 compiled crcmod (see "gsutil help crcmod") on the machine performing the |
| 357 download. If compiled crcmod is not available, normal download will instead |
| 358 be used. |
| 359 |
| 360 Note: since sliced object downloads cause multiple writes to occur at various |
| 361 locations on disk, this can degrade performance for disks with slow seek |
| 362 times, especially for large numbers of slices. While the default number of |
| 363 slices is small to avoid this, sliced object download can be completely |
| 364 disabled by setting the "sliced_object_download_threshold" variable in the |
| 365 .boto config file to 0. |
| 366 """ |
| 367 |
345 _PARALLEL_COMPOSITE_UPLOADS_TEXT = """ | 368 _PARALLEL_COMPOSITE_UPLOADS_TEXT = """ |
346 <B>PARALLEL COMPOSITE UPLOADS</B> | 369 <B>PARALLEL COMPOSITE UPLOADS</B> |
347 gsutil can automatically use | 370 gsutil can automatically use |
348 `object composition <https://developers.google.com/storage/docs/composite-obje
cts>`_ | 371 `object composition <https://developers.google.com/storage/docs/composite-obje
cts>`_ |
349 to perform uploads in parallel for large, local files being uploaded to Google | 372 to perform uploads in parallel for large, local files being uploaded to Google |
350 Cloud Storage. This means that, if enabled (see next paragraph), a large file | 373 Cloud Storage. This means that, if enabled (see next paragraph), a large file |
351 will be split into component pieces that will be uploaded in parallel. Those | 374 will be split into component pieces that will be uploaded in parallel. Those |
352 components will then be composed in the cloud, and the temporary components in | 375 components will then be composed in the cloud, and the temporary components in |
353 the cloud will be deleted after successful composition. No additional local | 376 the cloud will be deleted after successful composition. No additional local |
354 disk space is required for this operation. | 377 disk space is required for this operation. |
355 | 378 |
356 Using parallel composite uploads presents a tradeoff between upload | 379 Using parallel composite uploads presents a tradeoff between upload |
357 performance and download configuration: If you enable parallel composite | 380 performance and download configuration: If you enable parallel composite |
358 uploads your uploads will run faster, but someone will need to install a | 381 uploads your uploads will run faster, but someone will need to install a |
359 compiled crcmod (see "gsutil help crcmod") on every machine where objects are | 382 compiled crcmod (see "gsutil help crcmod") on every machine where objects are |
360 downloaded by gsutil or other Python applications. For some distributions this | 383 downloaded by gsutil or other Python applications. For some distributions this |
361 is easy (e.g., it comes pre-installed on MacOS), but in some cases users have | 384 is easy (e.g., it comes pre-installed on MacOS), but in some cases users have |
362 found it difficult. Because of this at present parallel composite uploads are | 385 found it difficult. Because of this at present parallel composite uploads are |
363 disabled by default. Google is actively working with a number of the Linux | 386 disabled by default. Google is actively working with a number of the Linux |
364 distributions to get crcmod included with the stock distribution. Once that is | 387 distributions to get crcmod included with the stock distribution. Once that is |
365 done we will re-enable parallel composite uploads by default in gsutil. | 388 done we will re-enable parallel composite uploads by default in gsutil. |
366 | 389 |
| 390 Parallel composite uploads should not be used with NEARLINE storage |
| 391 class buckets, as doing this would incur an early deletion charge for each |
| 392 component object. |
| 393 |
367 To try parallel composite uploads you can run the command: | 394 To try parallel composite uploads you can run the command: |
368 | 395 |
369 gsutil -o GSUtil:parallel_composite_upload_threshold=150M cp bigfile gs://yo
ur-bucket | 396 gsutil -o GSUtil:parallel_composite_upload_threshold=150M cp bigfile gs://yo
ur-bucket |
370 | 397 |
371 where bigfile is larger than 150 MiB. When you do this notice that the upload | 398 where bigfile is larger than 150 MiB. When you do this notice that the upload |
372 progress indicator continuously updates for several different uploads at once | 399 progress indicator continuously updates for several different uploads at once |
373 (corresponding to each of the sections of the file being uploaded in | 400 (corresponding to each of the sections of the file being uploaded in |
374 parallel), until the parallel upload completes. If you then want to enable | 401 parallel), until the parallel upload completes. If you then want to enable |
375 parallel composite uploads for all of your future uploads (notwithstanding the | 402 parallel composite uploads for all of your future uploads (notwithstanding the |
376 caveats mentioned earlier), you can uncomment and set the | 403 caveats mentioned earlier), you can uncomment and set the |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
457 export TMPDIR=/some/directory | 484 export TMPDIR=/some/directory |
458 | 485 |
459 On Windows 7 you can change the TMPDIR environment variable from Start -> | 486 On Windows 7 you can change the TMPDIR environment variable from Start -> |
460 Computer -> System -> Advanced System Settings -> Environment Variables. | 487 Computer -> System -> Advanced System Settings -> Environment Variables. |
461 You need to reboot after making this change for it to take effect. (Rebooting | 488 You need to reboot after making this change for it to take effect. (Rebooting |
462 is not necessary after running the export command on Linux and MacOS.) | 489 is not necessary after running the export command on Linux and MacOS.) |
463 """ | 490 """ |
464 | 491 |
465 _OPTIONS_TEXT = """ | 492 _OPTIONS_TEXT = """ |
466 <B>OPTIONS</B> | 493 <B>OPTIONS</B> |
467 -a canned_acl Sets named canned_acl when uploaded objects created. See | 494 -a canned_acl Sets named canned_acl when uploaded objects created. See |
468 'gsutil help acls' for further details. | 495 'gsutil help acls' for further details. |
| 496 |
| 497 -A Copy all source versions from a source buckets/folders. |
| 498 If not set, only the live version of each source object is |
| 499 copied. Note: this option is only useful when the destination |
| 500 bucket has versioning enabled. |
469 | 501 |
470 -c If an error occurs, continue to attempt to copy the remaining | 502 -c If an error occurs, continue to attempt to copy the remaining |
471 files. If any copies were unsuccessful, gsutil's exit status | 503 files. If any copies were unsuccessful, gsutil's exit status |
472 will be non-zero even if this flag is set. This option is | 504 will be non-zero even if this flag is set. This option is |
473 implicitly set when running "gsutil -m cp...". Note: -c only | 505 implicitly set when running "gsutil -m cp...". Note: -c only |
474 applies to the actual copying operation. If an error occurs | 506 applies to the actual copying operation. If an error occurs |
475 while iterating over the files in the local directory (e.g., | 507 while iterating over the files in the local directory (e.g., |
476 invalid Unicode file name) gsutil will print an error message | 508 invalid Unicode file name) gsutil will print an error message |
477 and abort. | 509 and abort. |
478 | 510 |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
566 together. | 598 together. |
567 | 599 |
568 -R, -r Causes directories, buckets, and bucket subdirectories to be | 600 -R, -r Causes directories, buckets, and bucket subdirectories to be |
569 copied recursively. If you neglect to use this option for | 601 copied recursively. If you neglect to use this option for |
570 an upload, gsutil will copy any files it finds and skip any | 602 an upload, gsutil will copy any files it finds and skip any |
571 directories. Similarly, neglecting to specify -r for a download | 603 directories. Similarly, neglecting to specify -r for a download |
572 will cause gsutil to copy any objects at the current bucket | 604 will cause gsutil to copy any objects at the current bucket |
573 directory level, and skip any subdirectories. | 605 directory level, and skip any subdirectories. |
574 | 606 |
575 -U Skip objects with unsupported object types instead of failing. | 607 -U Skip objects with unsupported object types instead of failing. |
576 Unsupported object types are s3 glacier objects. | 608 Unsupported object types are Amazon S3 Objects in the GLACIER |
| 609 storage class. |
577 | 610 |
578 -v Requests that the version-specific URL for each uploaded object | 611 -v Requests that the version-specific URL for each uploaded object |
579 be printed. Given this URL you can make future upload requests | 612 be printed. Given this URL you can make future upload requests |
580 that are safe in the face of concurrent updates, because Google | 613 that are safe in the face of concurrent updates, because Google |
581 Cloud Storage will refuse to perform the update if the current | 614 Cloud Storage will refuse to perform the update if the current |
582 object version doesn't match the version-specific URL. See | 615 object version doesn't match the version-specific URL. See |
583 'gsutil help versions' for more details. | 616 'gsutil help versions' for more details. |
584 | 617 |
585 -z <ext,...> Applies gzip content-encoding to file uploads with the given | 618 -z <ext,...> Applies gzip content-encoding to file uploads with the given |
586 extensions. This is useful when uploading files with | 619 extensions. This is useful when uploading files with |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
619 | 652 |
620 _DETAILED_HELP_TEXT = '\n\n'.join([_SYNOPSIS_TEXT, | 653 _DETAILED_HELP_TEXT = '\n\n'.join([_SYNOPSIS_TEXT, |
621 _DESCRIPTION_TEXT, | 654 _DESCRIPTION_TEXT, |
622 _NAME_CONSTRUCTION_TEXT, | 655 _NAME_CONSTRUCTION_TEXT, |
623 _SUBDIRECTORIES_TEXT, | 656 _SUBDIRECTORIES_TEXT, |
624 _COPY_IN_CLOUD_TEXT, | 657 _COPY_IN_CLOUD_TEXT, |
625 _CHECKSUM_VALIDATION_TEXT, | 658 _CHECKSUM_VALIDATION_TEXT, |
626 _RETRY_HANDLING_TEXT, | 659 _RETRY_HANDLING_TEXT, |
627 _RESUMABLE_TRANSFERS_TEXT, | 660 _RESUMABLE_TRANSFERS_TEXT, |
628 _STREAMING_TRANSFERS_TEXT, | 661 _STREAMING_TRANSFERS_TEXT, |
| 662 _SLICED_OBJECT_DOWNLOADS_TEXT, |
629 _PARALLEL_COMPOSITE_UPLOADS_TEXT, | 663 _PARALLEL_COMPOSITE_UPLOADS_TEXT, |
630 _CHANGING_TEMP_DIRECTORIES_TEXT, | 664 _CHANGING_TEMP_DIRECTORIES_TEXT, |
631 _OPTIONS_TEXT]) | 665 _OPTIONS_TEXT]) |
632 | 666 |
633 | 667 |
634 CP_SUB_ARGS = 'a:cDeIL:MNnprRtUvz:' | 668 CP_SUB_ARGS = 'a:AcDeIL:MNnprRtUvz:' |
635 | 669 |
636 | 670 |
637 def _CopyFuncWrapper(cls, args, thread_state=None): | 671 def _CopyFuncWrapper(cls, args, thread_state=None): |
638 cls.CopyFunc(args, thread_state=thread_state) | 672 cls.CopyFunc(args, thread_state=thread_state) |
639 | 673 |
640 | 674 |
641 def _CopyExceptionHandler(cls, e): | 675 def _CopyExceptionHandler(cls, e): |
642 """Simple exception handler to allow post-completion status.""" | 676 """Simple exception handler to allow post-completion status.""" |
643 cls.logger.error(str(e)) | 677 cls.logger.error(str(e)) |
644 cls.op_failure_count += 1 | 678 cls.op_failure_count += 1 |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
725 | 759 |
726 # Various GUI tools (like the GCS web console) create placeholder objects | 760 # Various GUI tools (like the GCS web console) create placeholder objects |
727 # ending with '/' when the user creates an empty directory. Normally these | 761 # ending with '/' when the user creates an empty directory. Normally these |
728 # tools should delete those placeholders once objects have been written | 762 # tools should delete those placeholders once objects have been written |
729 # "under" the directory, but sometimes the placeholders are left around. We | 763 # "under" the directory, but sometimes the placeholders are left around. We |
730 # need to filter them out here, otherwise if the user tries to rsync from | 764 # need to filter them out here, otherwise if the user tries to rsync from |
731 # GCS to a local directory it will result in a directory/file conflict | 765 # GCS to a local directory it will result in a directory/file conflict |
732 # (e.g., trying to download an object called "mydata/" where the local | 766 # (e.g., trying to download an object called "mydata/" where the local |
733 # directory "mydata" exists). | 767 # directory "mydata" exists). |
734 if IsCloudSubdirPlaceholder(exp_src_url): | 768 if IsCloudSubdirPlaceholder(exp_src_url): |
735 self.logger.info('Skipping cloud sub-directory placeholder object (%s) ' | 769 # We used to output the message 'Skipping cloud sub-directory placeholder |
736 'because such objects aren\'t needed in (and would ' | 770 # object...' but we no longer do so because it caused customer confusion. |
737 'interfere with) directories in the local file system', | |
738 exp_src_url) | |
739 return | 771 return |
740 | 772 |
741 if copy_helper_opts.use_manifest and self.manifest.WasSuccessful( | 773 if copy_helper_opts.use_manifest and self.manifest.WasSuccessful( |
742 exp_src_url.url_string): | 774 exp_src_url.url_string): |
743 return | 775 return |
744 | 776 |
745 if copy_helper_opts.perform_mv: | 777 if copy_helper_opts.perform_mv: |
746 if name_expansion_result.names_container: | 778 if name_expansion_result.names_container: |
747 # Use recursion_requested when performing name expansion for the | 779 # Use recursion_requested when performing name expansion for the |
748 # directory mv case so we can determine if any of the source URLs are | 780 # directory mv case so we can determine if any of the source URLs are |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
782 elapsed_time = bytes_transferred = 0 | 814 elapsed_time = bytes_transferred = 0 |
783 try: | 815 try: |
784 if copy_helper_opts.use_manifest: | 816 if copy_helper_opts.use_manifest: |
785 self.manifest.Initialize( | 817 self.manifest.Initialize( |
786 exp_src_url.url_string, dst_url.url_string) | 818 exp_src_url.url_string, dst_url.url_string) |
787 (elapsed_time, bytes_transferred, result_url, md5) = ( | 819 (elapsed_time, bytes_transferred, result_url, md5) = ( |
788 copy_helper.PerformCopy( | 820 copy_helper.PerformCopy( |
789 self.logger, exp_src_url, dst_url, gsutil_api, | 821 self.logger, exp_src_url, dst_url, gsutil_api, |
790 self, _CopyExceptionHandler, allow_splitting=True, | 822 self, _CopyExceptionHandler, allow_splitting=True, |
791 headers=self.headers, manifest=self.manifest, | 823 headers=self.headers, manifest=self.manifest, |
792 gzip_exts=self.gzip_exts, test_method=self.test_method)) | 824 gzip_exts=self.gzip_exts)) |
793 if copy_helper_opts.use_manifest: | 825 if copy_helper_opts.use_manifest: |
794 if md5: | 826 if md5: |
795 self.manifest.Set(exp_src_url.url_string, 'md5', md5) | 827 self.manifest.Set(exp_src_url.url_string, 'md5', md5) |
796 self.manifest.SetResult( | 828 self.manifest.SetResult( |
797 exp_src_url.url_string, bytes_transferred, 'OK') | 829 exp_src_url.url_string, bytes_transferred, 'OK') |
798 if copy_helper_opts.print_ver: | 830 if copy_helper_opts.print_ver: |
799 # Some cases don't return a version-specific URL (e.g., if destination | 831 # Some cases don't return a version-specific URL (e.g., if destination |
800 # is a file). | 832 # is a file). |
801 self.logger.info('Created: %s', result_url) | 833 self.logger.info('Created: %s', result_url) |
802 except ItemExistsError: | 834 except ItemExistsError: |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
865 url_strs = StdinIterator() | 897 url_strs = StdinIterator() |
866 else: | 898 else: |
867 if len(self.args) < 2: | 899 if len(self.args) < 2: |
868 raise CommandException('Wrong number of arguments for "cp" command.') | 900 raise CommandException('Wrong number of arguments for "cp" command.') |
869 url_strs = self.args[:-1] | 901 url_strs = self.args[:-1] |
870 | 902 |
871 (self.exp_dst_url, self.have_existing_dst_container) = ( | 903 (self.exp_dst_url, self.have_existing_dst_container) = ( |
872 copy_helper.ExpandUrlToSingleBlr(self.args[-1], self.gsutil_api, | 904 copy_helper.ExpandUrlToSingleBlr(self.args[-1], self.gsutil_api, |
873 self.debug, self.project_id)) | 905 self.debug, self.project_id)) |
874 | 906 |
875 # If the destination bucket has versioning enabled iterate with | |
876 # all_versions=True. That way we'll copy all versions if the source bucket | |
877 # is versioned; and by leaving all_versions=False if the destination bucket | |
878 # has versioning disabled we will avoid copying old versions all to the same | |
879 # un-versioned destination object. | |
880 all_versions = False | |
881 try: | |
882 bucket = self._GetBucketWithVersioningConfig(self.exp_dst_url) | |
883 if bucket and bucket.versioning and bucket.versioning.enabled: | |
884 all_versions = True | |
885 except AccessDeniedException: | |
886 # This happens (in the XML API only) if the user doesn't have OWNER access | |
887 # on the bucket (needed to check if versioning is enabled). In this case | |
888 # fall back to copying all versions (which can be inefficient for the | |
889 # reason noted in the comment above). We don't try to warn the user | |
890 # because that would result in false positive warnings (since we can't | |
891 # check if versioning is enabled on the destination bucket). | |
892 # | |
893 # For JSON, we will silently not return versioning if we don't have | |
894 # access. | |
895 all_versions = True | |
896 | |
897 name_expansion_iterator = NameExpansionIterator( | 907 name_expansion_iterator = NameExpansionIterator( |
898 self.command_name, self.debug, | 908 self.command_name, self.debug, |
899 self.logger, self.gsutil_api, url_strs, | 909 self.logger, self.gsutil_api, url_strs, |
900 self.recursion_requested or copy_helper_opts.perform_mv, | 910 self.recursion_requested or copy_helper_opts.perform_mv, |
901 project_id=self.project_id, all_versions=all_versions, | 911 project_id=self.project_id, all_versions=self.all_versions, |
902 continue_on_error=self.continue_on_error or self.parallel_operations) | 912 continue_on_error=self.continue_on_error or self.parallel_operations) |
903 | 913 |
904 # Use a lock to ensure accurate statistics in the face of | 914 # Use a lock to ensure accurate statistics in the face of |
905 # multi-threading/multi-processing. | 915 # multi-threading/multi-processing. |
906 self.stats_lock = CreateLock() | 916 self.stats_lock = CreateLock() |
907 | 917 |
908 # Tracks if any copies failed. | 918 # Tracks if any copies failed. |
909 self.op_failure_count = 0 | 919 self.op_failure_count = 0 |
910 | 920 |
911 # Start the clock. | 921 # Start the clock. |
(...skipping 29 matching lines...) Expand all Loading... |
941 if self.debug == 3: | 951 if self.debug == 3: |
942 # Note that this only counts the actual GET and PUT bytes for the copy | 952 # Note that this only counts the actual GET and PUT bytes for the copy |
943 # - not any transfers for doing wildcard expansion, the initial | 953 # - not any transfers for doing wildcard expansion, the initial |
944 # HEAD/GET request performed to get the object metadata, etc. | 954 # HEAD/GET request performed to get the object metadata, etc. |
945 if self.total_bytes_transferred != 0: | 955 if self.total_bytes_transferred != 0: |
946 self.logger.info( | 956 self.logger.info( |
947 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)', | 957 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)', |
948 self.total_bytes_transferred, self.total_elapsed_time, | 958 self.total_bytes_transferred, self.total_elapsed_time, |
949 MakeHumanReadable(self.total_bytes_per_second)) | 959 MakeHumanReadable(self.total_bytes_per_second)) |
950 if self.op_failure_count: | 960 if self.op_failure_count: |
951 plural_str = 's' if self.op_failure_count else '' | 961 plural_str = 's' if self.op_failure_count > 1 else '' |
952 raise CommandException('%d file%s/object%s could not be transferred.' % ( | 962 raise CommandException('%d file%s/object%s could not be transferred.' % ( |
953 self.op_failure_count, plural_str, plural_str)) | 963 self.op_failure_count, plural_str, plural_str)) |
954 | 964 |
955 return 0 | 965 return 0 |
956 | 966 |
957 def _ParseOpts(self): | 967 def _ParseOpts(self): |
958 perform_mv = False | 968 perform_mv = False |
959 # exclude_symlinks is handled by Command parent class, so save in Command | 969 # exclude_symlinks is handled by Command parent class, so save in Command |
960 # state rather than CopyHelperOpts. | 970 # state rather than CopyHelperOpts. |
961 self.exclude_symlinks = False | 971 self.exclude_symlinks = False |
962 no_clobber = False | 972 no_clobber = False |
963 # continue_on_error is handled by Command parent class, so save in Command | 973 # continue_on_error is handled by Command parent class, so save in Command |
964 # state rather than CopyHelperOpts. | 974 # state rather than CopyHelperOpts. |
965 self.continue_on_error = False | 975 self.continue_on_error = False |
966 daisy_chain = False | 976 daisy_chain = False |
967 read_args_from_stdin = False | 977 read_args_from_stdin = False |
968 print_ver = False | 978 print_ver = False |
969 use_manifest = False | 979 use_manifest = False |
970 preserve_acl = False | 980 preserve_acl = False |
971 canned_acl = None | 981 canned_acl = None |
972 # canned_acl is handled by a helper function in parent | 982 # canned_acl is handled by a helper function in parent |
973 # Command class, so save in Command state rather than CopyHelperOpts. | 983 # Command class, so save in Command state rather than CopyHelperOpts. |
974 self.canned = None | 984 self.canned = None |
975 | 985 |
| 986 self.all_versions = False |
| 987 |
976 self.skip_unsupported_objects = False | 988 self.skip_unsupported_objects = False |
977 | 989 |
978 # Files matching these extensions should be gzipped before uploading. | 990 # Files matching these extensions should be gzipped before uploading. |
979 self.gzip_exts = [] | 991 self.gzip_exts = [] |
980 | 992 |
981 test_callback_file = None | 993 test_callback_file = None |
982 | 994 |
983 # self.recursion_requested initialized in command.py (so can be checked | 995 # self.recursion_requested initialized in command.py (so can be checked |
984 # in parent class for all commands). | 996 # in parent class for all commands). |
985 self.manifest = None | 997 self.manifest = None |
986 if self.sub_opts: | 998 if self.sub_opts: |
987 for o, a in self.sub_opts: | 999 for o, a in self.sub_opts: |
988 if o == '-a': | 1000 if o == '-a': |
989 canned_acl = a | 1001 canned_acl = a |
990 self.canned = True | 1002 self.canned = True |
| 1003 if o == '-A': |
| 1004 self.all_versions = True |
991 if o == '-c': | 1005 if o == '-c': |
992 self.continue_on_error = True | 1006 self.continue_on_error = True |
993 elif o == '-D': | 1007 elif o == '-D': |
994 daisy_chain = True | 1008 daisy_chain = True |
995 elif o == '-e': | 1009 elif o == '-e': |
996 self.exclude_symlinks = True | 1010 self.exclude_symlinks = True |
997 elif o == '--testcallbackfile': | 1011 elif o == '--testcallbackfile': |
998 # File path of a pickled class that implements ProgressCallback.call. | 1012 # File path of a pickled class that implements ProgressCallback.call. |
999 # Used for testing transfer interruptions and resumes. | 1013 # Used for testing transfer interruptions and resumes. |
1000 test_callback_file = a | 1014 test_callback_file = a |
(...skipping 16 matching lines...) Expand all Loading... |
1017 self.recursion_requested = True | 1031 self.recursion_requested = True |
1018 elif o == '-U': | 1032 elif o == '-U': |
1019 self.skip_unsupported_objects = True | 1033 self.skip_unsupported_objects = True |
1020 elif o == '-v': | 1034 elif o == '-v': |
1021 print_ver = True | 1035 print_ver = True |
1022 elif o == '-z': | 1036 elif o == '-z': |
1023 self.gzip_exts = [x.strip() for x in a.split(',')] | 1037 self.gzip_exts = [x.strip() for x in a.split(',')] |
1024 if preserve_acl and canned_acl: | 1038 if preserve_acl and canned_acl: |
1025 raise CommandException( | 1039 raise CommandException( |
1026 'Specifying both the -p and -a options together is invalid.') | 1040 'Specifying both the -p and -a options together is invalid.') |
| 1041 if self.all_versions and self.parallel_operations: |
| 1042 raise CommandException( |
| 1043 'The gsutil -m option is not supported with the cp -A flag, to ' |
| 1044 'ensure that object version ordering is preserved. Please re-run ' |
| 1045 'the command without the -m option.') |
1027 return CreateCopyHelperOpts( | 1046 return CreateCopyHelperOpts( |
1028 perform_mv=perform_mv, | 1047 perform_mv=perform_mv, |
1029 no_clobber=no_clobber, | 1048 no_clobber=no_clobber, |
1030 daisy_chain=daisy_chain, | 1049 daisy_chain=daisy_chain, |
1031 read_args_from_stdin=read_args_from_stdin, | 1050 read_args_from_stdin=read_args_from_stdin, |
1032 print_ver=print_ver, | 1051 print_ver=print_ver, |
1033 use_manifest=use_manifest, | 1052 use_manifest=use_manifest, |
1034 preserve_acl=preserve_acl, | 1053 preserve_acl=preserve_acl, |
1035 canned_acl=canned_acl, | 1054 canned_acl=canned_acl, |
1036 skip_unsupported_objects=self.skip_unsupported_objects, | 1055 skip_unsupported_objects=self.skip_unsupported_objects, |
1037 test_callback_file=test_callback_file) | 1056 test_callback_file=test_callback_file) |
1038 | |
1039 def _GetBucketWithVersioningConfig(self, exp_dst_url): | |
1040 """Gets versioning config for a bucket and ensures that it exists. | |
1041 | |
1042 Args: | |
1043 exp_dst_url: Wildcard-expanded destination StorageUrl. | |
1044 | |
1045 Raises: | |
1046 AccessDeniedException: if there was a permissions problem accessing the | |
1047 bucket or its versioning config. | |
1048 CommandException: if URL refers to a cloud bucket that does not exist. | |
1049 | |
1050 Returns: | |
1051 apitools Bucket with versioning configuration. | |
1052 """ | |
1053 bucket = None | |
1054 if exp_dst_url.IsCloudUrl() and exp_dst_url.IsBucket(): | |
1055 try: | |
1056 bucket = self.gsutil_api.GetBucket( | |
1057 exp_dst_url.bucket_name, provider=exp_dst_url.scheme, | |
1058 fields=['versioning']) | |
1059 except AccessDeniedException, e: | |
1060 raise | |
1061 except NotFoundException, e: | |
1062 raise CommandException('Destination bucket %s does not exist.' % | |
1063 exp_dst_url) | |
1064 except Exception, e: | |
1065 raise CommandException('Error retrieving destination bucket %s: %s' % | |
1066 (exp_dst_url, e.message)) | |
1067 return bucket | |
OLD | NEW |