| Index: tools/telemetry/third_party/gsutilz/gslib/commands/cp.py
|
| diff --git a/tools/telemetry/third_party/gsutilz/gslib/commands/cp.py b/tools/telemetry/third_party/gsutilz/gslib/commands/cp.py
|
| deleted file mode 100644
|
| index 34636dc47d5df460909c9a9e65b38f139aed9a10..0000000000000000000000000000000000000000
|
| --- a/tools/telemetry/third_party/gsutilz/gslib/commands/cp.py
|
| +++ /dev/null
|
| @@ -1,1067 +0,0 @@
|
| -# -*- coding: utf-8 -*-
|
| -# Copyright 2011 Google Inc. All Rights Reserved.
|
| -# Copyright 2011, Nexenta Systems Inc.
|
| -#
|
| -# Licensed under the Apache License, Version 2.0 (the "License");
|
| -# you may not use this file except in compliance with the License.
|
| -# You may obtain a copy of the License at
|
| -#
|
| -# http://www.apache.org/licenses/LICENSE-2.0
|
| -#
|
| -# Unless required by applicable law or agreed to in writing, software
|
| -# distributed under the License is distributed on an "AS IS" BASIS,
|
| -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -# See the License for the specific language governing permissions and
|
| -# limitations under the License.
|
| -"""Implementation of Unix-like cp command for cloud storage providers."""
|
| -
|
| -from __future__ import absolute_import
|
| -
|
| -import os
|
| -import time
|
| -import traceback
|
| -
|
| -from gslib import copy_helper
|
| -from gslib.cat_helper import CatHelper
|
| -from gslib.cloud_api import AccessDeniedException
|
| -from gslib.cloud_api import NotFoundException
|
| -from gslib.command import Command
|
| -from gslib.command_argument import CommandArgument
|
| -from gslib.commands.compose import MAX_COMPONENT_COUNT
|
| -from gslib.copy_helper import CreateCopyHelperOpts
|
| -from gslib.copy_helper import ItemExistsError
|
| -from gslib.copy_helper import Manifest
|
| -from gslib.copy_helper import PARALLEL_UPLOAD_TEMP_NAMESPACE
|
| -from gslib.copy_helper import SkipUnsupportedObjectError
|
| -from gslib.cs_api_map import ApiSelector
|
| -from gslib.exception import CommandException
|
| -from gslib.name_expansion import NameExpansionIterator
|
| -from gslib.storage_url import ContainsWildcard
|
| -from gslib.util import CreateLock
|
| -from gslib.util import GetCloudApiInstance
|
| -from gslib.util import IsCloudSubdirPlaceholder
|
| -from gslib.util import MakeHumanReadable
|
| -from gslib.util import NO_MAX
|
| -from gslib.util import RemoveCRLFFromString
|
| -from gslib.util import StdinIterator
|
| -
|
| -_SYNOPSIS = """
|
| - gsutil cp [OPTION]... src_url dst_url
|
| - gsutil cp [OPTION]... src_url... dst_url
|
| - gsutil cp [OPTION]... -I dst_url
|
| -"""
|
| -
|
| -_SYNOPSIS_TEXT = """
|
| -<B>SYNOPSIS</B>
|
| -""" + _SYNOPSIS
|
| -
|
| -_DESCRIPTION_TEXT = """
|
| -<B>DESCRIPTION</B>
|
| - The gsutil cp command allows you to copy data between your local file
|
| - system and the cloud, copy data within the cloud, and copy data between
|
| - cloud storage providers. For example, to copy all text files from the
|
| - local directory to a bucket you could do:
|
| -
|
| - gsutil cp *.txt gs://my_bucket
|
| -
|
| - Similarly, you can download text files from a bucket by doing:
|
| -
|
| - gsutil cp gs://my_bucket/*.txt .
|
| -
|
| - If you want to copy an entire directory tree you need to use the -r option:
|
| -
|
| - gsutil cp -r dir gs://my_bucket
|
| -
|
| - If you have a large number of files to upload you might want to use the
|
| - gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
|
| - copy:
|
| -
|
| - gsutil -m cp -r dir gs://my_bucket
|
| -
|
| - You can pass a list of URLs (one per line) to copy on stdin instead of as
|
| - command line arguments by using the -I option. This allows you to use gsutil
|
| - in a pipeline to upload or download files / objects as generated by a program,
|
| - such as:
|
| -
|
| - some_program | gsutil -m cp -I gs://my_bucket
|
| -
|
| - or:
|
| -
|
| - some_program | gsutil -m cp -I ./download_dir
|
| -
|
| - The contents of stdin can name files, cloud URLs, and wildcards of files
|
| - and cloud URLs.
|
| -"""
|
| -
|
| -_NAME_CONSTRUCTION_TEXT = """
|
| -<B>HOW NAMES ARE CONSTRUCTED</B>
|
| - The gsutil cp command strives to name objects in a way consistent with how
|
| - Linux cp works, which causes names to be constructed in varying ways depending
|
| - on whether you're performing a recursive directory copy or copying
|
| - individually named objects; and whether you're copying to an existing or
|
| - non-existent directory.
|
| -
|
| - When performing recursive directory copies, object names are constructed
|
| - that mirror the source directory structure starting at the point of
|
| - recursive processing. For example, the command:
|
| -
|
| - gsutil cp -r dir1/dir2 gs://my_bucket
|
| -
|
| - will create objects named like gs://my_bucket/dir2/a/b/c, assuming
|
| - dir1/dir2 contains the file a/b/c.
|
| -
|
| - In contrast, copying individually named files will result in objects named
|
| - by the final path component of the source files. For example, the command:
|
| -
|
| - gsutil cp dir1/dir2/** gs://my_bucket
|
| -
|
| - will create objects named like gs://my_bucket/c.
|
| -
|
| - The same rules apply for downloads: recursive copies of buckets and
|
| - bucket subdirectories produce a mirrored filename structure, while copying
|
| - individually (or wildcard) named objects produce flatly named files.
|
| -
|
| - Note that in the above example the '**' wildcard matches all names
|
| - anywhere under dir. The wildcard '*' will match names just one level deep. For
|
| - more details see 'gsutil help wildcards'.
|
| -
|
| - There's an additional wrinkle when working with subdirectories: the resulting
|
| - names depend on whether the destination subdirectory exists. For example,
|
| - if gs://my_bucket/subdir exists as a subdirectory, the command:
|
| -
|
| - gsutil cp -r dir1/dir2 gs://my_bucket/subdir
|
| -
|
| - will create objects named like gs://my_bucket/subdir/dir2/a/b/c. In contrast,
|
| - if gs://my_bucket/subdir does not exist, this same gsutil cp command will
|
| - create objects named like gs://my_bucket/subdir/a/b/c.
|
| -
|
| - Note: If you use the
|
| - `Google Developers Console <https://console.developers.google.com>`_
|
| - to create folders, it does so by creating a "placeholder" object that ends
|
| - with a "/" character. gsutil skips these objects when downloading from the
|
| - cloud to the local file system, because attempting to create a file that
|
| - ends with a "/" is not allowed on Linux and MacOS. Because of this, it is
|
| - recommended that you not create objects that end with "/" (unless you don't
|
| - need to be able to download such objects using gsutil).
|
| -"""
|
| -
|
| -_SUBDIRECTORIES_TEXT = """
|
| -<B>COPYING TO/FROM SUBDIRECTORIES; DISTRIBUTING TRANSFERS ACROSS MACHINES</B>
|
| - You can use gsutil to copy to and from subdirectories by using a command
|
| - like:
|
| -
|
| - gsutil cp -r dir gs://my_bucket/data
|
| -
|
| - This will cause dir and all of its files and nested subdirectories to be
|
| - copied under the specified destination, resulting in objects with names like
|
| - gs://my_bucket/data/dir/a/b/c. Similarly you can download from bucket
|
| - subdirectories by using a command like:
|
| -
|
| - gsutil cp -r gs://my_bucket/data dir
|
| -
|
| - This will cause everything nested under gs://my_bucket/data to be downloaded
|
| - into dir, resulting in files with names like dir/data/a/b/c.
|
| -
|
| - Copying subdirectories is useful if you want to add data to an existing
|
| - bucket directory structure over time. It's also useful if you want
|
| - to parallelize uploads and downloads across multiple machines (often
|
| - reducing overall transfer time compared with simply running gsutil -m
|
| - cp on one machine). For example, if your bucket contains this structure:
|
| -
|
| - gs://my_bucket/data/result_set_01/
|
| - gs://my_bucket/data/result_set_02/
|
| - ...
|
| - gs://my_bucket/data/result_set_99/
|
| -
|
| - you could perform concurrent downloads across 3 machines by running these
|
| - commands on each machine, respectively:
|
| -
|
| - gsutil -m cp -r gs://my_bucket/data/result_set_[0-3]* dir
|
| - gsutil -m cp -r gs://my_bucket/data/result_set_[4-6]* dir
|
| - gsutil -m cp -r gs://my_bucket/data/result_set_[7-9]* dir
|
| -
|
| - Note that dir could be a local directory on each machine, or it could
|
| - be a directory mounted off of a shared file server; whether the latter
|
| - performs acceptably may depend on a number of things, so we recommend
|
| - you experiment and find out what works best for you.
|
| -"""
|
| -
|
| -_COPY_IN_CLOUD_TEXT = """
|
| -<B>COPYING IN THE CLOUD AND METADATA PRESERVATION</B>
|
| - If both the source and destination URL are cloud URLs from the same
|
| - provider, gsutil copies data "in the cloud" (i.e., without downloading
|
| - to and uploading from the machine where you run gsutil). In addition to
|
| - the performance and cost advantages of doing this, copying in the cloud
|
| - preserves metadata (like Content-Type and Cache-Control). In contrast,
|
| - when you download data from the cloud it ends up in a file, which has
|
| - no associated metadata. Thus, unless you have some way to hold on to
|
| - or re-create that metadata, downloading to a file will not retain the
|
| - metadata.
|
| -
|
| - Copies spanning locations and/or storage classes cause data to be rewritten
|
| - in the cloud, which may take some time. Such operations can be resumed with
|
| - the same command if they are interrupted, so long as the command parameters
|
| - are identical.
|
| -
|
| - Note that by default, the gsutil cp command does not copy the object
|
| - ACL to the new object, and instead will use the default bucket ACL (see
|
| - "gsutil help defacl"). You can override this behavior with the -p
|
| - option (see OPTIONS below).
|
| -
|
| - One additional note about copying in the cloud: If the destination bucket has
|
| - versioning enabled, gsutil cp will copy all versions of the source object(s).
|
| - For example:
|
| -
|
| - gsutil cp gs://bucket1/obj gs://bucket2
|
| -
|
| - will cause all versions of gs://bucket1/obj to be copied to gs://bucket2.
|
| -"""
|
| -
|
| -_CHECKSUM_VALIDATION_TEXT = """
|
| -<B>CHECKSUM VALIDATION</B>
|
| - At the end of every upload or download the gsutil cp command validates that
|
| - the checksum it computes for the source file/object matches the checksum
|
| - the service computes. If the checksums do not match, gsutil will delete the
|
| - corrupted object and print a warning message. This very rarely happens, but
|
| - if it does, please contact gs-team@google.com.
|
| -
|
| - If you know the MD5 of a file before uploading you can specify it in the
|
| - Content-MD5 header, which will cause the cloud storage service to reject the
|
| - upload if the MD5 doesn't match the value computed by the service. For
|
| - example:
|
| -
|
| - % gsutil hash obj
|
| - Hashing obj:
|
| - Hashes [base64] for obj:
|
| - Hash (crc32c): lIMoIw==
|
| - Hash (md5): VgyllJgiiaRAbyUUIqDMmw==
|
| -
|
| - % gsutil -h Content-MD5:VgyllJgiiaRAbyUUIqDMmw== cp obj gs://your-bucket/obj
|
| - Copying file://obj [Content-Type=text/plain]...
|
| - Uploading gs://your-bucket/obj: 182 b/182 B
|
| -
|
| - If the checksum didn't match the service would instead reject the upload and
|
| - gsutil would print a message like:
|
| -
|
| - BadRequestException: 400 Provided MD5 hash "VgyllJgiiaRAbyUUIqDMmw=="
|
| - doesn't match calculated MD5 hash "7gyllJgiiaRAbyUUIqDMmw==".
|
| -
|
| - Even if you don't do this gsutil will delete the object if the computed
|
| - checksum mismatches, but specifying the Content-MD5 header has three
|
| - advantages:
|
| -
|
| - 1. It prevents the corrupted object from becoming visible at all, whereas
|
| - otherwise it would be visible for 1-3 seconds before gsutil deletes it.
|
| -
|
| - 2. It will definitively prevent the corrupted object from being left in
|
| - the cloud, whereas the gsutil approach of deleting after the upload
|
| - completes could fail if (for example) the gsutil process gets ^C'd
|
| - between upload and deletion request.
|
| -
|
| - 3. It supports a customer-to-service integrity check handoff. For example,
|
| - if you have a content production pipeline that generates data to be
|
| - uploaded to the cloud along with checksums of that data, specifying the
|
| - MD5 computed by your content pipeline when you run gsutil cp will ensure
|
| - that the checksums match all the way through the process (e.g., detecting
|
| - if data gets corrupted on your local disk between the time it was written
|
| - by your content pipeline and the time it was uploaded to GCS).
|
| -
|
| - Note: The Content-MD5 header is ignored for composite objects, because such
|
| - objects only have a CRC32C checksum.
|
| -"""
|
| -
|
| -_RETRY_HANDLING_TEXT = """
|
| -<B>RETRY HANDLING</B>
|
| - The cp command will retry when failures occur, but if enough failures happen
|
| - during a particular copy or delete operation the command will skip that object
|
| - and move on. At the end of the copy run if any failures were not successfully
|
| - retried, the cp command will report the count of failures, and exit with
|
| - non-zero status.
|
| -
|
| - Note that there are cases where retrying will never succeed, such as if you
|
| - don't have write permission to the destination bucket or if the destination
|
| - path for some objects is longer than the maximum allowed length.
|
| -
|
| - For more details about gsutil's retry handling, please see
|
| - "gsutil help retries".
|
| -"""
|
| -
|
| -_RESUMABLE_TRANSFERS_TEXT = """
|
| -<B>RESUMABLE TRANSFERS</B>
|
| - gsutil automatically uses the Google Cloud Storage resumable upload feature
|
| - whenever you use the cp command to upload an object that is larger than 2
|
| - MiB. You do not need to specify any special command line options to make this
|
| - happen. If your upload is interrupted you can restart the upload by running
|
| - the same cp command that you ran to start the upload. Until the upload
|
| - has completed successfully, it will not be visible at the destination object
|
| - and will not replace any existing object the upload is intended to overwrite.
|
| - (However, see the section on PARALLEL COMPOSITE UPLOADS, which may leave
|
| - temporary component objects in place during the upload process.)
|
| -
|
| - Similarly, gsutil automatically performs resumable downloads (using HTTP
|
| - standard Range GET operations) whenever you use the cp command, unless the
|
| - destination is a stream or null. In this case the partially downloaded file
|
| - will be visible as soon as it starts being written. Thus, before you attempt
|
| - to use any files downloaded by gsutil you should make sure the download
|
| - completed successfully, by checking the exit status from the gsutil command.
|
| - This can be done in a bash script, for example, by doing:
|
| -
|
| - gsutil cp gs://your-bucket/your-object ./local-file
|
| - if [ "$status" -ne "0" ] ; then
|
| - << Code that handles failures >>
|
| - fi
|
| -
|
| - Resumable uploads and downloads store some state information in a file
|
| - in ~/.gsutil named by the destination object or file. If you attempt to
|
| - resume a transfer from a machine with a different directory, the transfer
|
| - will start over from scratch.
|
| -
|
| - See also "gsutil help prod" for details on using resumable transfers
|
| - in production.
|
| -"""
|
| -
|
| -_STREAMING_TRANSFERS_TEXT = """
|
| -<B>STREAMING TRANSFERS</B>
|
| - Use '-' in place of src_url or dst_url to perform a streaming
|
| - transfer. For example:
|
| -
|
| - long_running_computation | gsutil cp - gs://my_bucket/obj
|
| -
|
| - Streaming uploads using the JSON API (see "gsutil help apis") are buffered in
|
| - memory and can retry in the event of network flakiness or service errors.
|
| -
|
| - Streaming transfers (other than uploads using the JSON API) do not support
|
| - resumable uploads/downloads. If you have a large amount of data to upload
|
| - (say, more than 100 MiB) it is recommended to write the data to a local file
|
| - and then copy that file to the cloud rather than streaming it (and similarly
|
| - for large downloads).
|
| -
|
| - WARNING: When performing streaming transfers gsutil does not compute a
|
| - checksum of the uploaded or downloaded data. Therefore, we recommend that
|
| - users either perform their own validation of the data or use non-streaming
|
| - transfers (which perform integrity checking automatically).
|
| -"""
|
| -
|
| -_PARALLEL_COMPOSITE_UPLOADS_TEXT = """
|
| -<B>PARALLEL COMPOSITE UPLOADS</B>
|
| - gsutil can automatically use
|
| - `object composition <https://developers.google.com/storage/docs/composite-objects>`_
|
| - to perform uploads in parallel for large, local files being uploaded to Google
|
| - Cloud Storage. This means that, if enabled (see next paragraph), a large file
|
| - will be split into component pieces that will be uploaded in parallel. Those
|
| - components will then be composed in the cloud, and the temporary components in
|
| - the cloud will be deleted after successful composition. No additional local
|
| - disk space is required for this operation.
|
| -
|
| - Using parallel composite uploads presents a tradeoff between upload
|
| - performance and download configuration: If you enable parallel composite
|
| - uploads your uploads will run faster, but someone will need to install a
|
| - compiled crcmod (see "gsutil help crcmod") on every machine where objects are
|
| - downloaded by gsutil or other Python applications. For some distributions this
|
| - is easy (e.g., it comes pre-installed on MacOS), but in some cases users have
|
| - found it difficult. Because of this at present parallel composite uploads are
|
| - disabled by default. Google is actively working with a number of the Linux
|
| - distributions to get crcmod included with the stock distribution. Once that is
|
| - done we will re-enable parallel composite uploads by default in gsutil.
|
| -
|
| - To try parallel composite uploads you can run the command:
|
| -
|
| - gsutil -o GSUtil:parallel_composite_upload_threshold=150M cp bigfile gs://your-bucket
|
| -
|
| - where bigfile is larger than 150 MiB. When you do this notice that the upload
|
| - progress indicator continuously updates for several different uploads at once
|
| - (corresponding to each of the sections of the file being uploaded in
|
| - parallel), until the parallel upload completes. If you then want to enable
|
| - parallel composite uploads for all of your future uploads (notwithstanding the
|
| - caveats mentioned earlier), you can uncomment and set the
|
| - "parallel_composite_upload_threshold" config value in your .boto configuration
|
| - file to this value.
|
| -
|
| - Note that the crcmod problem only impacts downloads via Python applications
|
| - (such as gsutil). If any users who need to download the data using gsutil or
|
| - other Python applications can install crcmod, it makes sense to enable
|
| - parallel composite uploads (see above). For example, if you use gsutil to
|
| - upload video assets and those assets will only ever be served via a Java
|
| - application (there are efficient crc32c implementations available in Java), it
|
| - would make sense to enable parallel composite uploads on your machine.
|
| -
|
| - If a parallel composite upload fails prior to composition, re-running the
|
| - gsutil command will take advantage of resumable uploads for those components
|
| - that failed, and the component objects will be deleted after the first
|
| - successful attempt. Any temporary objects that were uploaded successfully
|
| - before gsutil failed will still exist until the upload is completed
|
| - successfully. The temporary objects will be named in the following fashion:
|
| -
|
| - <random ID>%s<hash>
|
| -
|
| - where <random ID> is some numerical value, and <hash> is an MD5 hash (not
|
| - related to the hash of the contents of the file or object).
|
| -
|
| - To avoid leaving temporary objects around, you should make sure to check the
|
| - exit status from the gsutil command. This can be done in a bash script, for
|
| - example, by doing:
|
| -
|
| - gsutil cp ./local-file gs://your-bucket/your-object
|
| - if [ "$status" -ne "0" ] ; then
|
| - << Code that handles failures >>
|
| - fi
|
| -
|
| - Or, for copying a directory, use this instead:
|
| -
|
| - gsutil cp -c -L cp.log -r ./dir gs://bucket
|
| - if [ "$status" -ne "0" ] ; then
|
| - << Code that handles failures >>
|
| - fi
|
| -
|
| - One important caveat is that files uploaded in this fashion are still subject
|
| - to the maximum number of components limit. For example, if you upload a large
|
| - file that gets split into %d components, and try to compose it with another
|
| - object with %d components, the operation will fail because it exceeds the %d
|
| - component limit. If you wish to compose an object later and the component
|
| - limit is a concern, it is recommended that you disable parallel composite
|
| - uploads for that transfer.
|
| -
|
| - Also note that an object uploaded using this feature will have a CRC32C hash,
|
| - but it will not have an MD5 hash (and because of that, requires users who
|
| - download the object to have crcmod installed, as noted earlier). For details
|
| - see 'gsutil help crc32c'.
|
| -
|
| - Note that this feature can be completely disabled by setting the
|
| - "parallel_composite_upload_threshold" variable in the .boto config file to 0.
|
| -""" % (PARALLEL_UPLOAD_TEMP_NAMESPACE, 10, MAX_COMPONENT_COUNT - 9,
|
| - MAX_COMPONENT_COUNT)
|
| -
|
| -
|
| -_CHANGING_TEMP_DIRECTORIES_TEXT = """
|
| -<B>CHANGING TEMP DIRECTORIES</B>
|
| - gsutil writes data to a temporary directory in several cases:
|
| -
|
| - - when compressing data to be uploaded (see the -z option)
|
| - - when decompressing data being downloaded (when the data has
|
| - Content-Encoding:gzip, e.g., as happens when uploaded using gsutil cp -z)
|
| - - when running integration tests (using the gsutil test command)
|
| -
|
| - In these cases it's possible the temp file location on your system that
|
| - gsutil selects by default may not have enough space. If you find that
|
| - gsutil runs out of space during one of these operations (e.g., raising
|
| - "CommandException: Inadequate temp space available to compress <your file>"
|
| - during a gsutil cp -z operation), you can change where it writes these
|
| - temp files by setting the TMPDIR environment variable. On Linux and MacOS
|
| - you can do this either by running gsutil this way:
|
| -
|
| - TMPDIR=/some/directory gsutil cp ...
|
| -
|
| - or by adding this line to your ~/.bashrc file and then restarting the shell
|
| - before running gsutil:
|
| -
|
| - export TMPDIR=/some/directory
|
| -
|
| - On Windows 7 you can change the TMPDIR environment variable from Start ->
|
| - Computer -> System -> Advanced System Settings -> Environment Variables.
|
| - You need to reboot after making this change for it to take effect. (Rebooting
|
| - is not necessary after running the export command on Linux and MacOS.)
|
| -"""
|
| -
|
| -_OPTIONS_TEXT = """
|
| -<B>OPTIONS</B>
|
| - -a canned_acl Sets named canned_acl when uploaded objects created. See
|
| - 'gsutil help acls' for further details.
|
| -
|
| - -c If an error occurs, continue to attempt to copy the remaining
|
| - files. If any copies were unsuccessful, gsutil's exit status
|
| - will be non-zero even if this flag is set. This option is
|
| - implicitly set when running "gsutil -m cp...". Note: -c only
|
| - applies to the actual copying operation. If an error occurs
|
| - while iterating over the files in the local directory (e.g.,
|
| - invalid Unicode file name) gsutil will print an error message
|
| - and abort.
|
| -
|
| - -D Copy in "daisy chain" mode, i.e., copying between two buckets
|
| - by hooking a download to an upload, via the machine where
|
| - gsutil is run. By default, data are copied between two buckets
|
| - "in the cloud", i.e., without needing to copy via the machine
|
| - where gsutil runs.
|
| -
|
| - By default, a "copy in the cloud" when the source is a
|
| - composite object will retain the composite nature of the
|
| - object. However, Daisy chain mode can be used to change a
|
| - composite object into a non-composite object. For example:
|
| -
|
| - gsutil cp -D -p gs://bucket/obj gs://bucket/obj_tmp
|
| - gsutil mv -p gs://bucket/obj_tmp gs://bucket/obj
|
| -
|
| - Note: Daisy chain mode is automatically used when copying
|
| - between providers (e.g., to copy data from Google Cloud Storage
|
| - to another provider).
|
| -
|
| - -e Exclude symlinks. When specified, symbolic links will not be
|
| - copied.
|
| -
|
| - -I Causes gsutil to read the list of files or objects to copy from
|
| - stdin. This allows you to run a program that generates the list
|
| - of files to upload/download.
|
| -
|
| - -L <file> Outputs a manifest log file with detailed information about
|
| - each item that was copied. This manifest contains the following
|
| - information for each item:
|
| -
|
| - - Source path.
|
| - - Destination path.
|
| - - Source size.
|
| - - Bytes transferred.
|
| - - MD5 hash.
|
| - - UTC date and time transfer was started in ISO 8601 format.
|
| - - UTC date and time transfer was completed in ISO 8601 format.
|
| - - Upload id, if a resumable upload was performed.
|
| - - Final result of the attempted transfer, success or failure.
|
| - - Failure details, if any.
|
| -
|
| - If the log file already exists, gsutil will use the file as an
|
| - input to the copy process, and will also append log items to
|
| - the existing file. Files/objects that are marked in the
|
| - existing log file as having been successfully copied (or
|
| - skipped) will be ignored. Files/objects without entries will be
|
| - copied and ones previously marked as unsuccessful will be
|
| - retried. This can be used in conjunction with the -c option to
|
| - build a script that copies a large number of objects reliably,
|
| - using a bash script like the following:
|
| -
|
| - until gsutil cp -c -L cp.log -r ./dir gs://bucket; do
|
| - sleep 1
|
| - done
|
| -
|
| - The -c option will cause copying to continue after failures
|
| - occur, and the -L option will allow gsutil to pick up where it
|
| - left off without duplicating work. The loop will continue
|
| - running as long as gsutil exits with a non-zero status (such a
|
| - status indicates there was at least one failure during the
|
| - gsutil run).
|
| -
|
| - Note: If you're trying to synchronize the contents of a
|
| - directory and a bucket (or two buckets), see
|
| - 'gsutil help rsync'.
|
| -
|
| - -n No-clobber. When specified, existing files or objects at the
|
| - destination will not be overwritten. Any items that are skipped
|
| - by this option will be reported as being skipped. This option
|
| - will perform an additional GET request to check if an item
|
| - exists before attempting to upload the data. This will save
|
| - retransmitting data, but the additional HTTP requests may make
|
| - small object transfers slower and more expensive.
|
| -
|
| - -p Causes ACLs to be preserved when copying in the cloud. Note
|
| - that this option has performance and cost implications when
|
| - using the XML API, as it requires separate HTTP calls for
|
| - interacting with ACLs. The performance issue can be mitigated
|
| - to some degree by using gsutil -m cp to cause parallel copying.
|
| - Also, this option only works if you have OWNER access to all of
|
| - the objects that are copied.
|
| -
|
| - You can avoid the additional performance and cost of using
|
| - cp -p if you want all objects in the destination bucket to end
|
| - up with the same ACL by setting a default object ACL on that
|
| - bucket instead of using cp -p. See "help gsutil defacl".
|
| -
|
| - Note that it's not valid to specify both the -a and -p options
|
| - together.
|
| -
|
| - -R, -r Causes directories, buckets, and bucket subdirectories to be
|
| - copied recursively. If you neglect to use this option for
|
| - an upload, gsutil will copy any files it finds and skip any
|
| - directories. Similarly, neglecting to specify -r for a download
|
| - will cause gsutil to copy any objects at the current bucket
|
| - directory level, and skip any subdirectories.
|
| -
|
| - -U Skip objects with unsupported object types instead of failing.
|
| - Unsupported object types are s3 glacier objects.
|
| -
|
| - -v Requests that the version-specific URL for each uploaded object
|
| - be printed. Given this URL you can make future upload requests
|
| - that are safe in the face of concurrent updates, because Google
|
| - Cloud Storage will refuse to perform the update if the current
|
| - object version doesn't match the version-specific URL. See
|
| - 'gsutil help versions' for more details.
|
| -
|
| - -z <ext,...> Applies gzip content-encoding to file uploads with the given
|
| - extensions. This is useful when uploading files with
|
| - compressible content (such as .js, .css, or .html files)
|
| - because it saves network bandwidth and space in Google Cloud
|
| - Storage, which in turn reduces storage costs.
|
| -
|
| - When you specify the -z option, the data from your files is
|
| - compressed before it is uploaded, but your actual files are
|
| - left uncompressed on the local disk. The uploaded objects
|
| - retain the Content-Type and name of the original files but are
|
| - given a Content-Encoding header with the value "gzip" to
|
| - indicate that the object data stored are compressed on the
|
| - Google Cloud Storage servers.
|
| -
|
| - For example, the following command:
|
| -
|
| - gsutil cp -z html -a public-read cattypes.html gs://mycats
|
| -
|
| - will do all of the following:
|
| -
|
| - - Upload as the object gs://mycats/cattypes.html (cp command)
|
| - - Set the Content-Type to text/html (based on file extension)
|
| - - Compress the data in the file cattypes.html (-z option)
|
| - - Set the Content-Encoding to gzip (-z option)
|
| - - Set the ACL to public-read (-a option)
|
| - - If a user tries to view cattypes.html in a browser, the
|
| - browser will know to uncompress the data based on the
|
| - Content-Encoding header, and to render it as HTML based on
|
| - the Content-Type header.
|
| -
|
| - Note that if you download an object with Content-Encoding:gzip
|
| - gsutil will decompress the content before writing the local
|
| - file.
|
| -"""
|
| -
|
| -_DETAILED_HELP_TEXT = '\n\n'.join([_SYNOPSIS_TEXT,
|
| - _DESCRIPTION_TEXT,
|
| - _NAME_CONSTRUCTION_TEXT,
|
| - _SUBDIRECTORIES_TEXT,
|
| - _COPY_IN_CLOUD_TEXT,
|
| - _CHECKSUM_VALIDATION_TEXT,
|
| - _RETRY_HANDLING_TEXT,
|
| - _RESUMABLE_TRANSFERS_TEXT,
|
| - _STREAMING_TRANSFERS_TEXT,
|
| - _PARALLEL_COMPOSITE_UPLOADS_TEXT,
|
| - _CHANGING_TEMP_DIRECTORIES_TEXT,
|
| - _OPTIONS_TEXT])
|
| -
|
| -
|
| -CP_SUB_ARGS = 'a:cDeIL:MNnprRtUvz:'
|
| -
|
| -
|
| -def _CopyFuncWrapper(cls, args, thread_state=None):
|
| - cls.CopyFunc(args, thread_state=thread_state)
|
| -
|
| -
|
| -def _CopyExceptionHandler(cls, e):
|
| - """Simple exception handler to allow post-completion status."""
|
| - cls.logger.error(str(e))
|
| - cls.op_failure_count += 1
|
| - cls.logger.debug('\n\nEncountered exception while copying:\n%s\n',
|
| - traceback.format_exc())
|
| -
|
| -
|
| -def _RmExceptionHandler(cls, e):
|
| - """Simple exception handler to allow post-completion status."""
|
| - cls.logger.error(str(e))
|
| -
|
| -
|
| -class CpCommand(Command):
|
| - """Implementation of gsutil cp command.
|
| -
|
| - Note that CpCommand is run for both gsutil cp and gsutil mv. The latter
|
| - happens by MvCommand calling CpCommand and passing the hidden (undocumented)
|
| - -M option. This allows the copy and remove needed for each mv to run
|
| - together (rather than first running all the cp's and then all the rm's, as
|
| - we originally had implemented), which in turn avoids the following problem
|
| - with removing the wrong objects: starting with a bucket containing only
|
| - the object gs://bucket/obj, say the user does:
|
| - gsutil mv gs://bucket/* gs://bucket/d.txt
|
| - If we ran all the cp's and then all the rm's and we didn't expand the wildcard
|
| - first, the cp command would first copy gs://bucket/obj to gs://bucket/d.txt,
|
| - and the rm command would then remove that object. In the implementation
|
| - prior to gsutil release 3.12 we avoided this by building a list of objects
|
| - to process and then running the copies and then the removes; but building
|
| - the list up front limits scalability (compared with the current approach
|
| - of processing the bucket listing iterator on the fly).
|
| - """
|
| -
|
| - # Command specification. See base class for documentation.
|
| - command_spec = Command.CreateCommandSpec(
|
| - 'cp',
|
| - command_name_aliases=['copy'],
|
| - usage_synopsis=_SYNOPSIS,
|
| - min_args=1,
|
| - max_args=NO_MAX,
|
| - # -t is deprecated but leave intact for now to avoid breakage.
|
| - supported_sub_args=CP_SUB_ARGS,
|
| - file_url_ok=True,
|
| - provider_url_ok=False,
|
| - urls_start_arg=0,
|
| - gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
|
| - gs_default_api=ApiSelector.JSON,
|
| - supported_private_args=['testcallbackfile='],
|
| - argparse_arguments=[
|
| - CommandArgument.MakeZeroOrMoreCloudOrFileURLsArgument()
|
| - ]
|
| - )
|
| - # Help specification. See help_provider.py for documentation.
|
| - help_spec = Command.HelpSpec(
|
| - help_name='cp',
|
| - help_name_aliases=['copy'],
|
| - help_type='command_help',
|
| - help_one_line_summary='Copy files and objects',
|
| - help_text=_DETAILED_HELP_TEXT,
|
| - subcommand_help_text={},
|
| - )
|
| -
|
| - # pylint: disable=too-many-statements
|
| - def CopyFunc(self, name_expansion_result, thread_state=None):
|
| - """Worker function for performing the actual copy (and rm, for mv)."""
|
| - gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
|
| -
|
| - copy_helper_opts = copy_helper.GetCopyHelperOpts()
|
| - if copy_helper_opts.perform_mv:
|
| - cmd_name = 'mv'
|
| - else:
|
| - cmd_name = self.command_name
|
| - src_url = name_expansion_result.source_storage_url
|
| - exp_src_url = name_expansion_result.expanded_storage_url
|
| - src_url_names_container = name_expansion_result.names_container
|
| - have_multiple_srcs = name_expansion_result.is_multi_source_request
|
| -
|
| - if src_url.IsCloudUrl() and src_url.IsProvider():
|
| - raise CommandException(
|
| - 'The %s command does not allow provider-only source URLs (%s)' %
|
| - (cmd_name, src_url))
|
| - if have_multiple_srcs:
|
| - copy_helper.InsistDstUrlNamesContainer(
|
| - self.exp_dst_url, self.have_existing_dst_container, cmd_name)
|
| -
|
| - # Various GUI tools (like the GCS web console) create placeholder objects
|
| - # ending with '/' when the user creates an empty directory. Normally these
|
| - # tools should delete those placeholders once objects have been written
|
| - # "under" the directory, but sometimes the placeholders are left around. We
|
| - # need to filter them out here, otherwise if the user tries to rsync from
|
| - # GCS to a local directory it will result in a directory/file conflict
|
| - # (e.g., trying to download an object called "mydata/" where the local
|
| - # directory "mydata" exists).
|
| - if IsCloudSubdirPlaceholder(exp_src_url):
|
| - self.logger.info('Skipping cloud sub-directory placeholder object (%s) '
|
| - 'because such objects aren\'t needed in (and would '
|
| - 'interfere with) directories in the local file system',
|
| - exp_src_url)
|
| - return
|
| -
|
| - if copy_helper_opts.use_manifest and self.manifest.WasSuccessful(
|
| - exp_src_url.url_string):
|
| - return
|
| -
|
| - if copy_helper_opts.perform_mv:
|
| - if name_expansion_result.names_container:
|
| - # Use recursion_requested when performing name expansion for the
|
| - # directory mv case so we can determine if any of the source URLs are
|
| - # directories (and then use cp -r and rm -r to perform the move, to
|
| - # match the behavior of Linux mv (which when moving a directory moves
|
| - # all the contained files).
|
| - self.recursion_requested = True
|
| - # Disallow wildcard src URLs when moving directories, as supporting it
|
| - # would make the name transformation too complex and would also be
|
| - # dangerous (e.g., someone could accidentally move many objects to the
|
| - # wrong name, or accidentally overwrite many objects).
|
| - if ContainsWildcard(src_url.url_string):
|
| - raise CommandException('The mv command disallows naming source '
|
| - 'directories using wildcards')
|
| -
|
| - if (self.exp_dst_url.IsFileUrl()
|
| - and not os.path.exists(self.exp_dst_url.object_name)
|
| - and have_multiple_srcs):
|
| - os.makedirs(self.exp_dst_url.object_name)
|
| -
|
| - dst_url = copy_helper.ConstructDstUrl(
|
| - src_url, exp_src_url, src_url_names_container, have_multiple_srcs,
|
| - self.exp_dst_url, self.have_existing_dst_container,
|
| - self.recursion_requested)
|
| - dst_url = copy_helper.FixWindowsNaming(src_url, dst_url)
|
| -
|
| - copy_helper.CheckForDirFileConflict(exp_src_url, dst_url)
|
| - if copy_helper.SrcDstSame(exp_src_url, dst_url):
|
| - raise CommandException('%s: "%s" and "%s" are the same file - '
|
| - 'abort.' % (cmd_name, exp_src_url, dst_url))
|
| -
|
| - if dst_url.IsCloudUrl() and dst_url.HasGeneration():
|
| - raise CommandException('%s: a version-specific URL\n(%s)\ncannot be '
|
| - 'the destination for gsutil cp - abort.'
|
| - % (cmd_name, dst_url))
|
| -
|
| - elapsed_time = bytes_transferred = 0
|
| - try:
|
| - if copy_helper_opts.use_manifest:
|
| - self.manifest.Initialize(
|
| - exp_src_url.url_string, dst_url.url_string)
|
| - (elapsed_time, bytes_transferred, result_url, md5) = (
|
| - copy_helper.PerformCopy(
|
| - self.logger, exp_src_url, dst_url, gsutil_api,
|
| - self, _CopyExceptionHandler, allow_splitting=True,
|
| - headers=self.headers, manifest=self.manifest,
|
| - gzip_exts=self.gzip_exts, test_method=self.test_method))
|
| - if copy_helper_opts.use_manifest:
|
| - if md5:
|
| - self.manifest.Set(exp_src_url.url_string, 'md5', md5)
|
| - self.manifest.SetResult(
|
| - exp_src_url.url_string, bytes_transferred, 'OK')
|
| - if copy_helper_opts.print_ver:
|
| - # Some cases don't return a version-specific URL (e.g., if destination
|
| - # is a file).
|
| - self.logger.info('Created: %s', result_url)
|
| - except ItemExistsError:
|
| - message = 'Skipping existing item: %s' % dst_url
|
| - self.logger.info(message)
|
| - if copy_helper_opts.use_manifest:
|
| - self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message)
|
| - except SkipUnsupportedObjectError, e:
|
| - message = ('Skipping item %s with unsupported object type %s' %
|
| - (exp_src_url.url_string, e.unsupported_type))
|
| - self.logger.info(message)
|
| - if copy_helper_opts.use_manifest:
|
| - self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message)
|
| - except copy_helper.FileConcurrencySkipError, e:
|
| - self.logger.warn('Skipping copy of source URL %s because destination URL '
|
| - '%s is already being copied by another gsutil process '
|
| - 'or thread (did you specify the same source URL twice?) '
|
| - % (src_url, dst_url))
|
| - except Exception, e:
|
| - if (copy_helper_opts.no_clobber and
|
| - copy_helper.IsNoClobberServerException(e)):
|
| - message = 'Rejected (noclobber): %s' % dst_url
|
| - self.logger.info(message)
|
| - if copy_helper_opts.use_manifest:
|
| - self.manifest.SetResult(
|
| - exp_src_url.url_string, 0, 'skip', message)
|
| - elif self.continue_on_error:
|
| - message = 'Error copying %s: %s' % (src_url, str(e))
|
| - self.op_failure_count += 1
|
| - self.logger.error(message)
|
| - if copy_helper_opts.use_manifest:
|
| - self.manifest.SetResult(
|
| - exp_src_url.url_string, 0, 'error',
|
| - RemoveCRLFFromString(message))
|
| - else:
|
| - if copy_helper_opts.use_manifest:
|
| - self.manifest.SetResult(
|
| - exp_src_url.url_string, 0, 'error', str(e))
|
| - raise
|
| - else:
|
| - if copy_helper_opts.perform_mv:
|
| - self.logger.info('Removing %s...', exp_src_url)
|
| - if exp_src_url.IsCloudUrl():
|
| - gsutil_api.DeleteObject(exp_src_url.bucket_name,
|
| - exp_src_url.object_name,
|
| - generation=exp_src_url.generation,
|
| - provider=exp_src_url.scheme)
|
| - else:
|
| - os.unlink(exp_src_url.object_name)
|
| -
|
| - with self.stats_lock:
|
| - self.total_elapsed_time += elapsed_time
|
| - self.total_bytes_transferred += bytes_transferred
|
| -
|
| - # Command entry point.
|
| - def RunCommand(self):
|
| - copy_helper_opts = self._ParseOpts()
|
| -
|
| - self.total_elapsed_time = self.total_bytes_transferred = 0
|
| - if self.args[-1] == '-' or self.args[-1] == 'file://-':
|
| - return CatHelper(self).CatUrlStrings(self.args[:-1])
|
| -
|
| - if copy_helper_opts.read_args_from_stdin:
|
| - if len(self.args) != 1:
|
| - raise CommandException('Source URLs cannot be specified with -I option')
|
| - url_strs = StdinIterator()
|
| - else:
|
| - if len(self.args) < 2:
|
| - raise CommandException('Wrong number of arguments for "cp" command.')
|
| - url_strs = self.args[:-1]
|
| -
|
| - (self.exp_dst_url, self.have_existing_dst_container) = (
|
| - copy_helper.ExpandUrlToSingleBlr(self.args[-1], self.gsutil_api,
|
| - self.debug, self.project_id))
|
| -
|
| - # If the destination bucket has versioning enabled iterate with
|
| - # all_versions=True. That way we'll copy all versions if the source bucket
|
| - # is versioned; and by leaving all_versions=False if the destination bucket
|
| - # has versioning disabled we will avoid copying old versions all to the same
|
| - # un-versioned destination object.
|
| - all_versions = False
|
| - try:
|
| - bucket = self._GetBucketWithVersioningConfig(self.exp_dst_url)
|
| - if bucket and bucket.versioning and bucket.versioning.enabled:
|
| - all_versions = True
|
| - except AccessDeniedException:
|
| - # This happens (in the XML API only) if the user doesn't have OWNER access
|
| - # on the bucket (needed to check if versioning is enabled). In this case
|
| - # fall back to copying all versions (which can be inefficient for the
|
| - # reason noted in the comment above). We don't try to warn the user
|
| - # because that would result in false positive warnings (since we can't
|
| - # check if versioning is enabled on the destination bucket).
|
| - #
|
| - # For JSON, we will silently not return versioning if we don't have
|
| - # access.
|
| - all_versions = True
|
| -
|
| - name_expansion_iterator = NameExpansionIterator(
|
| - self.command_name, self.debug,
|
| - self.logger, self.gsutil_api, url_strs,
|
| - self.recursion_requested or copy_helper_opts.perform_mv,
|
| - project_id=self.project_id, all_versions=all_versions,
|
| - continue_on_error=self.continue_on_error or self.parallel_operations)
|
| -
|
| - # Use a lock to ensure accurate statistics in the face of
|
| - # multi-threading/multi-processing.
|
| - self.stats_lock = CreateLock()
|
| -
|
| - # Tracks if any copies failed.
|
| - self.op_failure_count = 0
|
| -
|
| - # Start the clock.
|
| - start_time = time.time()
|
| -
|
| - # Tuple of attributes to share/manage across multiple processes in
|
| - # parallel (-m) mode.
|
| - shared_attrs = ('op_failure_count', 'total_bytes_transferred')
|
| -
|
| - # Perform copy requests in parallel (-m) mode, if requested, using
|
| - # configured number of parallel processes and threads. Otherwise,
|
| - # perform requests with sequential function calls in current process.
|
| - self.Apply(_CopyFuncWrapper, name_expansion_iterator,
|
| - _CopyExceptionHandler, shared_attrs,
|
| - fail_on_error=(not self.continue_on_error))
|
| - self.logger.debug(
|
| - 'total_bytes_transferred: %d', self.total_bytes_transferred)
|
| -
|
| - end_time = time.time()
|
| - self.total_elapsed_time = end_time - start_time
|
| -
|
| - # Sometimes, particularly when running unit tests, the total elapsed time
|
| - # is really small. On Windows, the timer resolution is too small and
|
| - # causes total_elapsed_time to be zero.
|
| - try:
|
| - float(self.total_bytes_transferred) / float(self.total_elapsed_time)
|
| - except ZeroDivisionError:
|
| - self.total_elapsed_time = 0.01
|
| -
|
| - self.total_bytes_per_second = (float(self.total_bytes_transferred) /
|
| - float(self.total_elapsed_time))
|
| -
|
| - if self.debug == 3:
|
| - # Note that this only counts the actual GET and PUT bytes for the copy
|
| - # - not any transfers for doing wildcard expansion, the initial
|
| - # HEAD/GET request performed to get the object metadata, etc.
|
| - if self.total_bytes_transferred != 0:
|
| - self.logger.info(
|
| - 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)',
|
| - self.total_bytes_transferred, self.total_elapsed_time,
|
| - MakeHumanReadable(self.total_bytes_per_second))
|
| - if self.op_failure_count:
|
| - plural_str = 's' if self.op_failure_count else ''
|
| - raise CommandException('%d file%s/object%s could not be transferred.' % (
|
| - self.op_failure_count, plural_str, plural_str))
|
| -
|
| - return 0
|
| -
|
| - def _ParseOpts(self):
|
| - perform_mv = False
|
| - # exclude_symlinks is handled by Command parent class, so save in Command
|
| - # state rather than CopyHelperOpts.
|
| - self.exclude_symlinks = False
|
| - no_clobber = False
|
| - # continue_on_error is handled by Command parent class, so save in Command
|
| - # state rather than CopyHelperOpts.
|
| - self.continue_on_error = False
|
| - daisy_chain = False
|
| - read_args_from_stdin = False
|
| - print_ver = False
|
| - use_manifest = False
|
| - preserve_acl = False
|
| - canned_acl = None
|
| - # canned_acl is handled by a helper function in parent
|
| - # Command class, so save in Command state rather than CopyHelperOpts.
|
| - self.canned = None
|
| -
|
| - self.skip_unsupported_objects = False
|
| -
|
| - # Files matching these extensions should be gzipped before uploading.
|
| - self.gzip_exts = []
|
| -
|
| - test_callback_file = None
|
| -
|
| - # self.recursion_requested initialized in command.py (so can be checked
|
| - # in parent class for all commands).
|
| - self.manifest = None
|
| - if self.sub_opts:
|
| - for o, a in self.sub_opts:
|
| - if o == '-a':
|
| - canned_acl = a
|
| - self.canned = True
|
| - if o == '-c':
|
| - self.continue_on_error = True
|
| - elif o == '-D':
|
| - daisy_chain = True
|
| - elif o == '-e':
|
| - self.exclude_symlinks = True
|
| - elif o == '--testcallbackfile':
|
| - # File path of a pickled class that implements ProgressCallback.call.
|
| - # Used for testing transfer interruptions and resumes.
|
| - test_callback_file = a
|
| - elif o == '-I':
|
| - read_args_from_stdin = True
|
| - elif o == '-L':
|
| - use_manifest = True
|
| - self.manifest = Manifest(a)
|
| - elif o == '-M':
|
| - # Note that we signal to the cp command to perform a move (copy
|
| - # followed by remove) and use directory-move naming rules by passing
|
| - # the undocumented (for internal use) -M option when running the cp
|
| - # command from mv.py.
|
| - perform_mv = True
|
| - elif o == '-n':
|
| - no_clobber = True
|
| - elif o == '-p':
|
| - preserve_acl = True
|
| - elif o == '-r' or o == '-R':
|
| - self.recursion_requested = True
|
| - elif o == '-U':
|
| - self.skip_unsupported_objects = True
|
| - elif o == '-v':
|
| - print_ver = True
|
| - elif o == '-z':
|
| - self.gzip_exts = [x.strip() for x in a.split(',')]
|
| - if preserve_acl and canned_acl:
|
| - raise CommandException(
|
| - 'Specifying both the -p and -a options together is invalid.')
|
| - return CreateCopyHelperOpts(
|
| - perform_mv=perform_mv,
|
| - no_clobber=no_clobber,
|
| - daisy_chain=daisy_chain,
|
| - read_args_from_stdin=read_args_from_stdin,
|
| - print_ver=print_ver,
|
| - use_manifest=use_manifest,
|
| - preserve_acl=preserve_acl,
|
| - canned_acl=canned_acl,
|
| - skip_unsupported_objects=self.skip_unsupported_objects,
|
| - test_callback_file=test_callback_file)
|
| -
|
| - def _GetBucketWithVersioningConfig(self, exp_dst_url):
|
| - """Gets versioning config for a bucket and ensures that it exists.
|
| -
|
| - Args:
|
| - exp_dst_url: Wildcard-expanded destination StorageUrl.
|
| -
|
| - Raises:
|
| - AccessDeniedException: if there was a permissions problem accessing the
|
| - bucket or its versioning config.
|
| - CommandException: if URL refers to a cloud bucket that does not exist.
|
| -
|
| - Returns:
|
| - apitools Bucket with versioning configuration.
|
| - """
|
| - bucket = None
|
| - if exp_dst_url.IsCloudUrl() and exp_dst_url.IsBucket():
|
| - try:
|
| - bucket = self.gsutil_api.GetBucket(
|
| - exp_dst_url.bucket_name, provider=exp_dst_url.scheme,
|
| - fields=['versioning'])
|
| - except AccessDeniedException, e:
|
| - raise
|
| - except NotFoundException, e:
|
| - raise CommandException('Destination bucket %s does not exist.' %
|
| - exp_dst_url)
|
| - except Exception, e:
|
| - raise CommandException('Error retrieving destination bucket %s: %s' %
|
| - (exp_dst_url, e.message))
|
| - return bucket
|
|
|