| Index: third_party/gsutil/gslib/commands/cp.py
 | 
| diff --git a/third_party/gsutil/gslib/commands/cp.py b/third_party/gsutil/gslib/commands/cp.py
 | 
| new file mode 100644
 | 
| index 0000000000000000000000000000000000000000..215b505793661d4219fa7ef13db6767c72867798
 | 
| --- /dev/null
 | 
| +++ b/third_party/gsutil/gslib/commands/cp.py
 | 
| @@ -0,0 +1,1819 @@
 | 
| +# Copyright 2011 Google Inc. All Rights Reserved.
 | 
| +# Copyright 2011, Nexenta Systems Inc.
 | 
| +#
 | 
| +# Licensed under the Apache License, Version 2.0 (the "License");
 | 
| +# you may not use this file except in compliance with the License.
 | 
| +# You may obtain a copy of the License at
 | 
| +#
 | 
| +#     http://www.apache.org/licenses/LICENSE-2.0
 | 
| +#
 | 
| +# Unless required by applicable law or agreed to in writing, software
 | 
| +# distributed under the License is distributed on an "AS IS" BASIS,
 | 
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
| +# See the License for the specific language governing permissions and
 | 
| +# limitations under the License.
 | 
| +
 | 
| +import boto
 | 
| +import errno
 | 
| +import gzip
 | 
| +import hashlib
 | 
| +import mimetypes
 | 
| +import os
 | 
| +import platform
 | 
| +import re
 | 
| +import subprocess
 | 
| +import stat
 | 
| +import sys
 | 
| +import tempfile
 | 
| +import threading
 | 
| +import time
 | 
| +
 | 
| +from boto import config
 | 
| +from boto.exception import GSResponseError
 | 
| +from boto.exception import ResumableUploadException
 | 
| +from boto.gs.resumable_upload_handler import ResumableUploadHandler
 | 
| +from boto.s3.keyfile import KeyFile
 | 
| +from boto.s3.resumable_download_handler import ResumableDownloadHandler
 | 
| +from boto.storage_uri import BucketStorageUri
 | 
| +from gslib.command import COMMAND_NAME
 | 
| +from gslib.command import COMMAND_NAME_ALIASES
 | 
| +from gslib.command import CONFIG_REQUIRED
 | 
| +from gslib.command import Command
 | 
| +from gslib.command import FILE_URIS_OK
 | 
| +from gslib.command import MAX_ARGS
 | 
| +from gslib.command import MIN_ARGS
 | 
| +from gslib.command import PROVIDER_URIS_OK
 | 
| +from gslib.command import SUPPORTED_SUB_ARGS
 | 
| +from gslib.command import URIS_START_ARG
 | 
| +from gslib.exception import CommandException
 | 
| +from gslib.help_provider import HELP_NAME
 | 
| +from gslib.help_provider import HELP_NAME_ALIASES
 | 
| +from gslib.help_provider import HELP_ONE_LINE_SUMMARY
 | 
| +from gslib.help_provider import HELP_TEXT
 | 
| +from gslib.help_provider import HELP_TYPE
 | 
| +from gslib.help_provider import HelpType
 | 
| +from gslib.name_expansion import NameExpansionIterator
 | 
| +from gslib.util import ExtractErrorDetail
 | 
| +from gslib.util import IS_WINDOWS
 | 
| +from gslib.util import MakeHumanReadable
 | 
| +from gslib.util import NO_MAX
 | 
| +from gslib.util import TWO_MB
 | 
| +from gslib.wildcard_iterator import ContainsWildcard
 | 
| +
 | 
| +_detailed_help_text = ("""
 | 
| +<B>SYNOPSIS</B>
 | 
| +  gsutil cp [OPTION]... src_uri dst_uri
 | 
| +    - or -
 | 
| +  gsutil cp [OPTION]... src_uri... dst_uri
 | 
| +    - or -
 | 
| +  gsutil cp [OPTION]... -I dst_uri
 | 
| +
 | 
| +
 | 
| +<B>DESCRIPTION</B>
 | 
| +  The gsutil cp command allows you to copy data between your local file
 | 
| +  system and the cloud, copy data within the cloud, and copy data between
 | 
| +  cloud storage providers. For example, to copy all text files from the
 | 
| +  local directory to a bucket you could do:
 | 
| +
 | 
| +    gsutil cp *.txt gs://my_bucket
 | 
| +
 | 
| +  Similarly, you can download text files from a bucket by doing:
 | 
| +
 | 
| +    gsutil cp gs://my_bucket/*.txt .
 | 
| +
 | 
| +  If you want to copy an entire directory tree you need to use the -R option:
 | 
| +
 | 
| +    gsutil cp -R dir gs://my_bucket
 | 
| +
 | 
| +  If you have a large number of files to upload you might want to use the
 | 
| +  gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
 | 
| +  copy:
 | 
| +
 | 
| +    gsutil -m cp -R dir gs://my_bucket
 | 
| +
 | 
| +  You can pass a list of URIs to copy on STDIN instead of as command line
 | 
| +  arguments by using the -I option. This allows you to use gsutil in a
 | 
| +  pipeline to copy files and objects as generated by a program, such as:
 | 
| +
 | 
| +    some_program | gsutil -m cp -I gs://my_bucket
 | 
| +
 | 
| +  The contents of STDIN can name files, cloud URIs, and wildcards of files
 | 
| +  and cloud URIs.
 | 
| +
 | 
| +
 | 
| +<B>HOW NAMES ARE CONSTRUCTED</B>
 | 
| +  The gsutil cp command strives to name objects in a way consistent with how
 | 
| +  Linux cp works, which causes names to be constructed in varying ways depending
 | 
| +  on whether you're performing a recursive directory copy or copying
 | 
| +  individually named objects; and whether you're copying to an existing or
 | 
| +  non-existent directory.
 | 
| +
 | 
| +  When performing recursive directory copies, object names are constructed
 | 
| +  that mirror the source directory structure starting at the point of
 | 
| +  recursive processing. For example, the command:
 | 
| +
 | 
| +    gsutil cp -R dir1/dir2 gs://my_bucket
 | 
| +
 | 
| +  will create objects named like gs://my_bucket/dir2/a/b/c, assuming
 | 
| +  dir1/dir2 contains the file a/b/c.
 | 
| +
 | 
| +  In contrast, copying individually named files will result in objects named
 | 
| +  by the final path component of the source files. For example, the command:
 | 
| +
 | 
| +    gsutil cp dir1/dir2/** gs://my_bucket
 | 
| +
 | 
| +  will create objects named like gs://my_bucket/c.
 | 
| +
 | 
| +  The same rules apply for downloads: recursive copies of buckets and
 | 
| +  bucket subdirectories produce a mirrored filename structure, while copying
 | 
| +  individually (or wildcard) named objects produce flatly named files.
 | 
| +
 | 
| +  Note that in the above example the '**' wildcard matches all names
 | 
| +  anywhere under dir. The wildcard '*' will match names just one level deep. For
 | 
| +  more details see 'gsutil help wildcards'.
 | 
| +
 | 
| +  There's an additional wrinkle when working with subdirectories: the resulting
 | 
| +  names depend on whether the destination subdirectory exists. For example,
 | 
| +  if gs://my_bucket/subdir exists as a subdirectory, the command:
 | 
| +
 | 
| +    gsutil cp -R dir1/dir2 gs://my_bucket/subdir
 | 
| +
 | 
| +  will create objects named like gs://my_bucket/subdir/dir2/a/b/c. In contrast,
 | 
| +  if gs://my_bucket/subdir does not exist, this same gsutil cp command will
 | 
| +  create objects named like gs://my_bucket/subdir/a/b/c.
 | 
| +
 | 
| +
 | 
| +<B>COPYING TO/FROM SUBDIRECTORIES; DISTRIBUTING TRANSFERS ACROSS MACHINES</B>
 | 
| +  You can use gsutil to copy to and from subdirectories by using a command like:
 | 
| +
 | 
| +    gsutil cp -R dir gs://my_bucket/data
 | 
| +
 | 
| +  This will cause dir and all of its files and nested subdirectories to be
 | 
| +  copied under the specified destination, resulting in objects with names like
 | 
| +  gs://my_bucket/data/dir/a/b/c. Similarly you can download from bucket
 | 
| +  subdirectories by using a command like:
 | 
| +
 | 
| +    gsutil cp -R gs://my_bucket/data dir
 | 
| +
 | 
| +  This will cause everything nested under gs://my_bucket/data to be downloaded
 | 
| +  into dir, resulting in files with names like dir/data/a/b/c.
 | 
| +
 | 
| +  Copying subdirectories is useful if you want to add data to an existing
 | 
| +  bucket directory structure over time. It's also useful if you want
 | 
| +  to parallelize uploads and downloads across multiple machines (often
 | 
| +  reducing overall transfer time compared with simply running gsutil -m
 | 
| +  cp on one machine). For example, if your bucket contains this structure:
 | 
| +
 | 
| +    gs://my_bucket/data/result_set_01/
 | 
| +    gs://my_bucket/data/result_set_02/
 | 
| +    ...
 | 
| +    gs://my_bucket/data/result_set_99/
 | 
| +
 | 
| +  you could perform concurrent downloads across 3 machines by running these
 | 
| +  commands on each machine, respectively:
 | 
| +
 | 
| +    gsutil -m cp -R gs://my_bucket/data/result_set_[0-3]* dir
 | 
| +    gsutil -m cp -R gs://my_bucket/data/result_set_[4-6]* dir
 | 
| +    gsutil -m cp -R gs://my_bucket/data/result_set_[7-9]* dir
 | 
| +
 | 
| +  Note that dir could be a local directory on each machine, or it could
 | 
| +  be a directory mounted off of a shared file server; whether the latter
 | 
| +  performs acceptably may depend on a number of things, so we recommend
 | 
| +  you experiment and find out what works best for you.
 | 
| +
 | 
| +
 | 
| +<B>COPYING IN THE CLOUD AND METADATA PRESERVATION</B>
 | 
| +  If both the source and destination URI are cloud URIs from the same
 | 
| +  provider, gsutil copies data "in the cloud" (i.e., without downloading
 | 
| +  to and uploading from the machine where you run gsutil). In addition to
 | 
| +  the performance and cost advantages of doing this, copying in the cloud
 | 
| +  preserves metadata (like Content-Type and Cache-Control).  In contrast,
 | 
| +  when you download data from the cloud it ends up in a file, which has
 | 
| +  no associated metadata. Thus, unless you have some way to hold on to
 | 
| +  or re-create that metadata, downloading to a file will not retain the
 | 
| +  metadata.
 | 
| +
 | 
| +  Note that by default, the gsutil cp command does not copy the object
 | 
| +  ACL to the new object, and instead will use the default bucket ACL (see
 | 
| +  "gsutil help setdefacl").  You can override this behavior with the -p
 | 
| +  option (see OPTIONS below).
 | 
| +
 | 
| +  gsutil does not preserve metadata when copying objects between providers.
 | 
| +
 | 
| +
 | 
| +<B>RESUMABLE TRANSFERS</B>
 | 
| +  gsutil automatically uses the Google Cloud Storage resumable upload
 | 
| +  feature whenever you use the cp command to upload an object that is larger
 | 
| +  than 2 MB. You do not need to specify any special command line options
 | 
| +  to make this happen. If your upload is interrupted you can restart the
 | 
| +  upload by running the same cp command that you ran to start the upload.
 | 
| +
 | 
| +  Similarly, gsutil automatically performs resumable downloads (using HTTP
 | 
| +  standard Range GET operations) whenever you use the cp command to download an
 | 
| +  object larger than 2 MB.
 | 
| +
 | 
| +  Resumable uploads and downloads store some state information in a file
 | 
| +  in ~/.gsutil named by the destination object or file. If you attempt to
 | 
| +  resume a transfer from a machine with a different directory, the transfer
 | 
| +  will start over from scratch.
 | 
| +
 | 
| +  See also "gsutil help prod" for details on using resumable transfers
 | 
| +  in production.
 | 
| +
 | 
| +
 | 
| +<B>STREAMING TRANSFERS</B>
 | 
| +  Use '-' in place of src_uri or dst_uri to perform a streaming
 | 
| +  transfer. For example:
 | 
| +    long_running_computation | gsutil cp - gs://my_bucket/obj
 | 
| +
 | 
| +  Streaming transfers do not support resumable uploads/downloads.
 | 
| +  (The Google resumable transfer protocol has a way to support streaming
 | 
| +  transers, but gsutil doesn't currently implement support for this.)
 | 
| +
 | 
| +
 | 
| +<B>CHANGING TEMP DIRECTORIES</B>
 | 
| +  gsutil writes data to a temporary directory in several cases:
 | 
| +    - when compressing data to be uploaded (see the -z option)
 | 
| +    - when decompressing data being downloaded (when the data has
 | 
| +      Content-Encoding:gzip, e.g., as happens when uploaded using gsutil cp -z)
 | 
| +    - when running integration tests (using the gsutil test command)
 | 
| +
 | 
| +  In these cases it's possible the temp file location on your system that
 | 
| +  gsutil selects by default may not have enough space. If you find that
 | 
| +  gsutil runs out of space during one of these operations (e.g., raising
 | 
| +  "CommandException: Inadequate temp space available to compress <your file>"
 | 
| +  during a gsutil cp -z operation), you can change where it writes these
 | 
| +  temp files by setting the TMPDIR environment variable. On Linux and MacOS
 | 
| +  you can do this either by running gsutil this way:
 | 
| +
 | 
| +    TMPDIR=/some/directory gsutil cp ...
 | 
| +
 | 
| +  or by adding this line to your ~/.bashrc file and then restarting the shell
 | 
| +  before running gsutil:
 | 
| +
 | 
| +    export TMPDIR=/some/directory
 | 
| +
 | 
| +  On Windows 7 you can change the TMPDIR environment variable from Start ->
 | 
| +  Computer -> System -> Advanced System Settings -> Environment Variables.
 | 
| +  You need to reboot after making this change for it to take effect. (Rebooting
 | 
| +  is not necessary after running the export command on Linux and MacOS.)
 | 
| +
 | 
| +
 | 
| +<B>OPTIONS</B>
 | 
| +  -a canned_acl Sets named canned_acl when uploaded objects created. See
 | 
| +                'gsutil help acls' for further details.
 | 
| +
 | 
| +  -c            If an error occurrs, continue to attempt to copy the remaining
 | 
| +                files.
 | 
| +
 | 
| +  -D            Copy in "daisy chain" mode, i.e., copying between two buckets by
 | 
| +                hooking a download to an upload, via the machine where gsutil is
 | 
| +                run. By default, data are copied between two buckets "in the
 | 
| +                cloud", i.e., without needing to copy via the machine where
 | 
| +                gsutil runs. However, copy-in-the-cloud is not supported when
 | 
| +                copying between different locations (like US and EU) or between
 | 
| +                different storage classes (like STANDARD and
 | 
| +                DURABLE_REDUCED_AVAILABILITY). For these cases, you can use the
 | 
| +                -D option to copy data between buckets.
 | 
| +                Note: Daisy chain mode is automatically used when copying
 | 
| +                between providers (e.g., to copy data from Google Cloud Storage
 | 
| +                to another provider).
 | 
| +
 | 
| +  -e            Exclude symlinks. When specified, symbolic links will not be
 | 
| +                copied.
 | 
| +
 | 
| +  -n            No-clobber. When specified, existing files or objects at the
 | 
| +                destination will not be overwritten. Any items that are skipped
 | 
| +                by this option will be reported as being skipped. This option
 | 
| +                will perform an additional HEAD request to check if an item
 | 
| +                exists before attempting to upload the data. This will save
 | 
| +                retransmitting data, but the additional HTTP requests may make
 | 
| +                small object transfers slower and more expensive.
 | 
| +
 | 
| +                This option can be combined with the -c option to build a script
 | 
| +                that copies a large number of objects, allowing retries when
 | 
| +                some failures occur from which gsutil doesn't automatically
 | 
| +                recover, using a bash script like the following:
 | 
| +
 | 
| +                    status=1
 | 
| +                    while [ $status -ne 0 ] ; do
 | 
| +                        gsutil cp -c -n -R ./dir gs://bucket
 | 
| +                        status=$?
 | 
| +                    done
 | 
| +
 | 
| +                The -c option will cause copying to continue after failures
 | 
| +                occur, and the -n option will cause objects already copied to be
 | 
| +                skipped on subsequent iterations. The loop will continue running
 | 
| +                as long as gsutil exits with a non-zero status (such a status
 | 
| +                indicates there was at least one failure during the gsutil run).
 | 
| +
 | 
| +  -p            Causes ACLs to be preserved when copying in the cloud. Note that
 | 
| +                this option has performance and cost implications, because it
 | 
| +                is essentially performing three requests (getacl, cp, setacl).
 | 
| +                (The performance issue can be mitigated to some degree by
 | 
| +                using gsutil -m cp to cause parallel copying.)
 | 
| +
 | 
| +                You can avoid the additional performance and cost of using cp -p
 | 
| +                if you want all objects in the destination bucket to end up with
 | 
| +                the same ACL by setting a default ACL on that bucket instead of
 | 
| +                using cp -p. See "help gsutil setdefacl".
 | 
| +
 | 
| +                Note that it's not valid to specify both the -a and -p options
 | 
| +                together.
 | 
| +
 | 
| +  -q            Causes copies to be performed quietly, i.e., without reporting
 | 
| +                progress indicators of files being copied. Errors are still
 | 
| +                reported. This option can be useful for running gsutil from a
 | 
| +                cron job that logs its output to a file, for which the only
 | 
| +                information desired in the log is failures.
 | 
| +
 | 
| +  -R, -r        Causes directories, buckets, and bucket subdirectories to be
 | 
| +                copied recursively. If you neglect to use this option for
 | 
| +                an upload, gsutil will copy any files it finds and skip any
 | 
| +                directories. Similarly, neglecting to specify -R for a download
 | 
| +                will cause gsutil to copy any objects at the current bucket
 | 
| +                directory level, and skip any subdirectories.
 | 
| +
 | 
| +  -v            Requests that the version-specific URI for each uploaded object
 | 
| +                be printed. Given this URI you can make future upload requests
 | 
| +                that are safe in the face of concurrent updates, because Google
 | 
| +                Cloud Storage will refuse to perform the update if the current
 | 
| +                object version doesn't match the version-specific URI. See
 | 
| +                'gsutil help versioning' for more details. Note: at present this
 | 
| +                option does not work correctly for objects copied "in the cloud"
 | 
| +                (e.g., gsutil cp gs://bucket/obj1 gs://bucket/obj2).
 | 
| +
 | 
| +  -z ext1,...   Compresses file uploads with the given extensions. If you are
 | 
| +                uploading a large file with compressible content, such as
 | 
| +                a .js, .css, or .html file, you can gzip-compress the file
 | 
| +                during the upload process by specifying the -z <extensions>
 | 
| +                option. Compressing data before upload saves on usage charges
 | 
| +                because you are uploading a smaller amount of data.
 | 
| +
 | 
| +                When you specify the -z option, the data from your files is
 | 
| +                compressed before it is uploaded, but your actual files are left
 | 
| +                uncompressed on the local disk. The uploaded objects retain the
 | 
| +                original content type and name as the original files but are
 | 
| +                given a Content-Encoding header with the value "gzip" to
 | 
| +                indicate that the object data stored are compressed on the
 | 
| +                Google Cloud Storage servers.
 | 
| +
 | 
| +                For example, the following command:
 | 
| +
 | 
| +                  gsutil cp -z html -a public-read cattypes.html gs://mycats
 | 
| +
 | 
| +                will do all of the following:
 | 
| +                  - Upload as the object gs://mycats/cattypes.html (cp command)
 | 
| +                  - Set the Content-Type to text/html (based on file extension)
 | 
| +                  - Compress the data in the file cattypes.html (-z option)
 | 
| +                  - Set the Content-Encoding to gzip (-z option)
 | 
| +                  - Set the ACL to public-read (-a option)
 | 
| +                  - If a user tries to view cattypes.html in a browser, the
 | 
| +                    browser will know to uncompress the data based on the
 | 
| +                    Content-Encoding header, and to render it as HTML based on
 | 
| +                    the Content-Type header.
 | 
| +""")
 | 
| +
 | 
| +class CpCommand(Command):
 | 
| +  """
 | 
| +  Implementation of gsutil cp command.
 | 
| +
 | 
| +  Note that CpCommand is run for both gsutil cp and gsutil mv. The latter
 | 
| +  happens by MvCommand calling CpCommand and passing the hidden (undocumented)
 | 
| +  -M option. This allows the copy and remove needed for each mv to run
 | 
| +  together (rather than first running all the cp's and then all the rm's, as
 | 
| +  we originally had implemented), which in turn avoids the following problem
 | 
| +  with removing the wrong objects: starting with a bucket containing only
 | 
| +  the object gs://bucket/obj, say the user does:
 | 
| +    gsutil mv gs://bucket/* gs://bucket/d.txt
 | 
| +  If we ran all the cp's and then all the rm's and we didn't expand the wildcard
 | 
| +  first, the cp command would first copy gs://bucket/obj to gs://bucket/d.txt,
 | 
| +  and the rm command would then remove that object. In the implementation
 | 
| +  prior to gsutil release 3.12 we avoided this by building a list of objects
 | 
| +  to process and then running the copies and then the removes; but building
 | 
| +  the list up front limits scalability (compared with the current approach
 | 
| +  of processing the bucket listing iterator on the fly).
 | 
| +  """
 | 
| +
 | 
| +  # Set default Content-Type type.
 | 
| +  DEFAULT_CONTENT_TYPE = 'application/octet-stream'
 | 
| +  USE_MAGICFILE = boto.config.getbool('GSUtil', 'use_magicfile', False)
 | 
| +
 | 
| +  # Command specification (processed by parent class).
 | 
| +  command_spec = {
 | 
| +    # Name of command.
 | 
| +    COMMAND_NAME : 'cp',
 | 
| +    # List of command name aliases.
 | 
| +    COMMAND_NAME_ALIASES : ['copy'],
 | 
| +    # Min number of args required by this command.
 | 
| +    MIN_ARGS : 1,
 | 
| +    # Max number of args required by this command, or NO_MAX.
 | 
| +    MAX_ARGS : NO_MAX,
 | 
| +    # Getopt-style string specifying acceptable sub args.
 | 
| +    # -t is deprecated but leave intact for now to avoid breakage.
 | 
| +    SUPPORTED_SUB_ARGS : 'a:cDeIMNnpqrRtvz:',
 | 
| +    # True if file URIs acceptable for this command.
 | 
| +    FILE_URIS_OK : True,
 | 
| +    # True if provider-only URIs acceptable for this command.
 | 
| +    PROVIDER_URIS_OK : False,
 | 
| +    # Index in args of first URI arg.
 | 
| +    URIS_START_ARG : 0,
 | 
| +    # True if must configure gsutil before running command.
 | 
| +    CONFIG_REQUIRED : True,
 | 
| +  }
 | 
| +  help_spec = {
 | 
| +    # Name of command or auxiliary help info for which this help applies.
 | 
| +    HELP_NAME : 'cp',
 | 
| +    # List of help name aliases.
 | 
| +    HELP_NAME_ALIASES : ['copy'],
 | 
| +    # Type of help:
 | 
| +    HELP_TYPE : HelpType.COMMAND_HELP,
 | 
| +    # One line summary of this help.
 | 
| +    HELP_ONE_LINE_SUMMARY : 'Copy files and objects',
 | 
| +    # The full help text.
 | 
| +    HELP_TEXT : _detailed_help_text,
 | 
| +  }
 | 
| +
 | 
| +  def _CheckFinalMd5(self, key, file_name):
 | 
| +    """
 | 
| +    Checks that etag from server agrees with md5 computed after the
 | 
| +    download completes.
 | 
| +    """
 | 
| +    obj_md5 = key.etag.strip('"\'')
 | 
| +    file_md5 = None
 | 
| +
 | 
| +    if hasattr(key, 'md5') and key.md5:
 | 
| +      file_md5 = key.md5
 | 
| +    else:
 | 
| +      print 'Computing MD5 from scratch for resumed download'
 | 
| +
 | 
| +      # Open file in binary mode to avoid surprises in Windows.
 | 
| +      fp = open(file_name, 'rb')
 | 
| +      try:
 | 
| +        file_md5 = key.compute_md5(fp)[0]
 | 
| +      finally:
 | 
| +        fp.close()
 | 
| +
 | 
| +    if self.debug:
 | 
| +      print 'Checking file md5 against etag. (%s/%s)' % (file_md5, obj_md5)
 | 
| +    if file_md5 != obj_md5:
 | 
| +      # Checksums don't match - remove file and raise exception.
 | 
| +      os.unlink(file_name)
 | 
| +      raise CommandException(
 | 
| +        'File changed during download: md5 signature doesn\'t match '
 | 
| +        'etag (incorrect downloaded file deleted)')
 | 
| +
 | 
| +  def _CheckForDirFileConflict(self, exp_src_uri, dst_uri):
 | 
| +    """Checks whether copying exp_src_uri into dst_uri is not possible.
 | 
| +
 | 
| +       This happens if a directory exists in local file system where a file
 | 
| +       needs to go or vice versa. In that case we print an error message and
 | 
| +       exits. Example: if the file "./x" exists and you try to do:
 | 
| +         gsutil cp gs://mybucket/x/y .
 | 
| +       the request can't succeed because it requires a directory where
 | 
| +       the file x exists.
 | 
| +
 | 
| +       Note that we don't enforce any corresponding restrictions for buckets,
 | 
| +       because the flat namespace semantics for buckets doesn't prohibit such
 | 
| +       cases the way hierarchical file systems do. For example, if a bucket
 | 
| +       contains an object called gs://bucket/dir and then you run the command:
 | 
| +         gsutil cp file1 file2 gs://bucket/dir
 | 
| +       you'll end up with objects gs://bucket/dir, gs://bucket/dir/file1, and
 | 
| +       gs://bucket/dir/file2.
 | 
| +
 | 
| +    Args:
 | 
| +      exp_src_uri: Expanded source StorageUri of copy.
 | 
| +      dst_uri: Destination URI.
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException: if errors encountered.
 | 
| +    """
 | 
| +    if dst_uri.is_cloud_uri():
 | 
| +      # The problem can only happen for file destination URIs.
 | 
| +      return
 | 
| +    dst_path = dst_uri.object_name
 | 
| +    final_dir = os.path.dirname(dst_path)
 | 
| +    if os.path.isfile(final_dir):
 | 
| +      raise CommandException('Cannot retrieve %s because a file exists '
 | 
| +                             'where a directory needs to be created (%s).' %
 | 
| +                             (exp_src_uri, final_dir))
 | 
| +    if os.path.isdir(dst_path):
 | 
| +      raise CommandException('Cannot retrieve %s because a directory exists '
 | 
| +                             '(%s) where the file needs to be created.' %
 | 
| +                             (exp_src_uri, dst_path))
 | 
| +
 | 
| +  def _InsistDstUriNamesContainer(self, exp_dst_uri,
 | 
| +                                  have_existing_dst_container, command_name):
 | 
| +    """
 | 
| +    Raises an exception if URI doesn't name a directory, bucket, or bucket
 | 
| +    subdir, with special exception for cp -R (see comments below).
 | 
| +
 | 
| +    Args:
 | 
| +      exp_dst_uri: Wildcard-expanding dst_uri.
 | 
| +      have_existing_dst_container: bool indicator of whether exp_dst_uri
 | 
| +        names a container (directory, bucket, or existing bucket subdir).
 | 
| +      command_name: Name of command making call. May not be the same as
 | 
| +          self.command_name in the case of commands implemented atop other
 | 
| +          commands (like mv command).
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException: if the URI being checked does not name a container.
 | 
| +    """
 | 
| +    if exp_dst_uri.is_file_uri():
 | 
| +      ok = exp_dst_uri.names_directory()
 | 
| +    else:
 | 
| +      if have_existing_dst_container:
 | 
| +        ok = True
 | 
| +      else:
 | 
| +        # It's ok to specify a non-existing bucket subdir, for example:
 | 
| +        #   gsutil cp -R dir gs://bucket/abc
 | 
| +        # where gs://bucket/abc isn't an existing subdir.
 | 
| +        ok = exp_dst_uri.names_object()
 | 
| +    if not ok:
 | 
| +      raise CommandException('Destination URI must name a directory, bucket, '
 | 
| +                             'or bucket\nsubdirectory for the multiple '
 | 
| +                             'source form of the %s command.' % command_name)
 | 
| +
 | 
| +  class _FileCopyCallbackHandler(object):
 | 
| +    """Outputs progress info for large copy requests."""
 | 
| +
 | 
| +    def __init__(self, upload):
 | 
| +      if upload:
 | 
| +        self.announce_text = 'Uploading'
 | 
| +      else:
 | 
| +        self.announce_text = 'Downloading'
 | 
| +
 | 
| +    def call(self, total_bytes_transferred, total_size):
 | 
| +      sys.stderr.write('%s: %s/%s    \r' % (
 | 
| +          self.announce_text,
 | 
| +          MakeHumanReadable(total_bytes_transferred),
 | 
| +          MakeHumanReadable(total_size)))
 | 
| +      if total_bytes_transferred == total_size:
 | 
| +        sys.stderr.write('\n')
 | 
| +
 | 
| +  class _StreamCopyCallbackHandler(object):
 | 
| +    """Outputs progress info for Stream copy to cloud.
 | 
| +       Total Size of the stream is not known, so we output
 | 
| +       only the bytes transferred.
 | 
| +    """
 | 
| +
 | 
| +    def call(self, total_bytes_transferred, total_size):
 | 
| +      sys.stderr.write('Uploading: %s    \r' % (
 | 
| +          MakeHumanReadable(total_bytes_transferred)))
 | 
| +      if total_size and total_bytes_transferred == total_size:
 | 
| +        sys.stderr.write('\n')
 | 
| +
 | 
| +  def _GetTransferHandlers(self, dst_uri, size, upload):
 | 
| +    """
 | 
| +    Selects upload/download and callback handlers.
 | 
| +
 | 
| +    We use a callback handler that shows a simple textual progress indicator
 | 
| +    if size is above the configurable threshold.
 | 
| +
 | 
| +    We use a resumable transfer handler if size is >= the configurable
 | 
| +    threshold and resumable transfers are supported by the given provider.
 | 
| +    boto supports resumable downloads for all providers, but resumable
 | 
| +    uploads are currently only supported by GS.
 | 
| +
 | 
| +    Args:
 | 
| +      dst_uri: the destination URI.
 | 
| +      size: size of file (object) being uploaded (downloaded).
 | 
| +      upload: bool indication of whether transfer is an upload.
 | 
| +    """
 | 
| +    config = boto.config
 | 
| +    resumable_threshold = config.getint('GSUtil', 'resumable_threshold', TWO_MB)
 | 
| +    transfer_handler = None
 | 
| +    cb = None
 | 
| +    num_cb = None
 | 
| +
 | 
| +    # Checks whether the destination file is a "special" file, like /dev/null on
 | 
| +    # Linux platforms or null on Windows platforms, so we can disable resumable
 | 
| +    # download support since the file size of the destination won't ever be
 | 
| +    # correct.
 | 
| +    dst_is_special = False
 | 
| +    if dst_uri.is_file_uri():
 | 
| +      # Check explicitly first because os.stat doesn't work on 'nul' in Windows.
 | 
| +      if dst_uri.object_name == os.devnull:
 | 
| +        dst_is_special = True
 | 
| +      try:
 | 
| +        mode = os.stat(dst_uri.object_name).st_mode
 | 
| +        if stat.S_ISCHR(mode):
 | 
| +          dst_is_special = True
 | 
| +      except OSError:
 | 
| +        pass
 | 
| +
 | 
| +    if size >= resumable_threshold and not dst_is_special:
 | 
| +      if not self.quiet:
 | 
| +        cb = self._FileCopyCallbackHandler(upload).call
 | 
| +        num_cb = int(size / TWO_MB)
 | 
| +
 | 
| +      resumable_tracker_dir = config.get(
 | 
| +          'GSUtil', 'resumable_tracker_dir',
 | 
| +          os.path.expanduser('~' + os.sep + '.gsutil'))
 | 
| +      if not os.path.exists(resumable_tracker_dir):
 | 
| +        os.makedirs(resumable_tracker_dir)
 | 
| +
 | 
| +      if upload:
 | 
| +        # Encode the dest bucket and object name into the tracker file name.
 | 
| +        res_tracker_file_name = (
 | 
| +            re.sub('[/\\\\]', '_', 'resumable_upload__%s__%s.url' %
 | 
| +                   (dst_uri.bucket_name, dst_uri.object_name)))
 | 
| +      else:
 | 
| +        # Encode the fully-qualified dest file name into the tracker file name.
 | 
| +        res_tracker_file_name = (
 | 
| +            re.sub('[/\\\\]', '_', 'resumable_download__%s.etag' %
 | 
| +                   (os.path.realpath(dst_uri.object_name))))
 | 
| +
 | 
| +      res_tracker_file_name = _hash_filename(res_tracker_file_name)
 | 
| +      tracker_file = '%s%s%s' % (resumable_tracker_dir, os.sep,
 | 
| +                                 res_tracker_file_name)
 | 
| +      if upload:
 | 
| +        if dst_uri.scheme == 'gs':
 | 
| +          transfer_handler = ResumableUploadHandler(tracker_file)
 | 
| +      else:
 | 
| +        transfer_handler = ResumableDownloadHandler(tracker_file)
 | 
| +
 | 
| +    return (cb, num_cb, transfer_handler)
 | 
| +
 | 
| +  def _LogCopyOperation(self, src_uri, dst_uri, headers):
 | 
| +    """
 | 
| +    Logs copy operation being performed, including Content-Type if appropriate.
 | 
| +    """
 | 
| +    if self.quiet:
 | 
| +      return
 | 
| +    if 'Content-Type' in headers and dst_uri.is_cloud_uri():
 | 
| +      content_type_msg = ' [Content-Type=%s]' % headers['Content-Type']
 | 
| +    else:
 | 
| +      content_type_msg = ''
 | 
| +    if src_uri.is_stream():
 | 
| +      self.THREADED_LOGGER.info('Copying from <STDIN>%s...', content_type_msg)
 | 
| +    else:
 | 
| +      self.THREADED_LOGGER.info('Copying %s%s...', src_uri, content_type_msg)
 | 
| +
 | 
| +  # We pass the headers explicitly to this call instead of using self.headers
 | 
| +  # so we can set different metadata (like Content-Type type) for each object.
 | 
| +  def _CopyObjToObjInTheCloud(self, src_key, src_uri, dst_uri, headers):
 | 
| +    """Performs copy-in-the cloud from specified src to dest object.
 | 
| +
 | 
| +    Args:
 | 
| +      src_key: Source Key.
 | 
| +      src_uri: Source StorageUri.
 | 
| +      dst_uri: Destination StorageUri.
 | 
| +      headers: A copy of the headers dictionary.
 | 
| +
 | 
| +    Returns:
 | 
| +      (elapsed_time, bytes_transferred, dst_uri) excluding overhead like initial
 | 
| +      HEAD. Note: At present copy-in-the-cloud doesn't return the generation of
 | 
| +      the created object, so the returned URI is actually not version-specific
 | 
| +      (unlike other cp cases).
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException: if errors encountered.
 | 
| +    """
 | 
| +    self._SetContentTypeHeader(src_uri, headers)
 | 
| +    self._LogCopyOperation(src_uri, dst_uri, headers)
 | 
| +    # Do Object -> object copy within same provider (uses
 | 
| +    # x-<provider>-copy-source metadata HTTP header to request copying at the
 | 
| +    # server).
 | 
| +    src_bucket = src_uri.get_bucket(False, headers)
 | 
| +    preserve_acl = False
 | 
| +    canned_acl = None
 | 
| +    if self.sub_opts:
 | 
| +      for o, a in self.sub_opts:
 | 
| +        if o == '-a':
 | 
| +          canned_acls = dst_uri.canned_acls()
 | 
| +          if a not in canned_acls:
 | 
| +            raise CommandException('Invalid canned ACL "%s".' % a)
 | 
| +          canned_acl = a
 | 
| +          headers[dst_uri.get_provider().acl_header] = canned_acl
 | 
| +        if o == '-p':
 | 
| +          preserve_acl = True
 | 
| +    if preserve_acl and canned_acl:
 | 
| +      raise CommandException(
 | 
| +          'Specifying both the -p and -a options together is invalid.')
 | 
| +    start_time = time.time()
 | 
| +    # Pass headers in headers param not metadata param, so boto will copy
 | 
| +    # existing key's metadata and just set the additional headers specified
 | 
| +    # in the headers param (rather than using the headers to override existing
 | 
| +    # metadata). In particular this allows us to copy the existing key's
 | 
| +    # Content-Type and other metadata users need while still being able to
 | 
| +    # set headers the API needs (like x-goog-project-id). Note that this means
 | 
| +    # you can't do something like:
 | 
| +    #   gsutil cp -t Content-Type text/html gs://bucket/* gs://bucket2
 | 
| +    # to change the Content-Type while copying.
 | 
| +
 | 
| +    try:
 | 
| +      dst_key = dst_uri.copy_key(
 | 
| +          src_bucket.name, src_uri.object_name, preserve_acl=False,
 | 
| +          headers=headers, src_version_id=src_uri.version_id,
 | 
| +          src_generation=src_uri.generation)
 | 
| +    except GSResponseError as e:
 | 
| +      exc_name, error_detail = ExtractErrorDetail(e)
 | 
| +      if (exc_name == 'GSResponseError'
 | 
| +          and ('Copy-in-the-cloud disallowed' in error_detail)):
 | 
| +          raise CommandException('%s.\nNote: you can copy between locations '
 | 
| +                                 'and between storage classes by using the '
 | 
| +                                 'gsutil cp -D option.' % error_detail)
 | 
| +      else:
 | 
| +        raise
 | 
| +    end_time = time.time()
 | 
| +    return (end_time - start_time, src_key.size,
 | 
| +            dst_uri.clone_replace_key(dst_key))
 | 
| +
 | 
| +  def _CheckFreeSpace(self, path):
 | 
| +    """Return path/drive free space (in bytes)."""
 | 
| +    if platform.system() == 'Windows':
 | 
| +      from ctypes import c_int, c_uint64, c_wchar_p, windll, POINTER, WINFUNCTYPE, WinError
 | 
| +      try:
 | 
| +        GetDiskFreeSpaceEx = WINFUNCTYPE(c_int, c_wchar_p, POINTER(c_uint64),
 | 
| +                                         POINTER(c_uint64), POINTER(c_uint64))
 | 
| +        GetDiskFreeSpaceEx = GetDiskFreeSpaceEx(
 | 
| +            ('GetDiskFreeSpaceExW', windll.kernel32), (
 | 
| +                (1, 'lpszPathName'),
 | 
| +                (2, 'lpFreeUserSpace'),
 | 
| +                (2, 'lpTotalSpace'),
 | 
| +                (2, 'lpFreeSpace'),))
 | 
| +      except AttributeError:
 | 
| +        GetDiskFreeSpaceEx = WINFUNCTYPE(c_int, c_char_p, POINTER(c_uint64),
 | 
| +                                         POINTER(c_uint64), POINTER(c_uint64))
 | 
| +        GetDiskFreeSpaceEx = GetDiskFreeSpaceEx(
 | 
| +            ('GetDiskFreeSpaceExA', windll.kernel32), (
 | 
| +                (1, 'lpszPathName'),
 | 
| +                (2, 'lpFreeUserSpace'),
 | 
| +                (2, 'lpTotalSpace'),
 | 
| +                (2, 'lpFreeSpace'),))
 | 
| +
 | 
| +      def GetDiskFreeSpaceEx_errcheck(result, func, args):
 | 
| +        if not result:
 | 
| +            raise WinError()
 | 
| +        return args[1].value
 | 
| +      GetDiskFreeSpaceEx.errcheck = GetDiskFreeSpaceEx_errcheck
 | 
| +
 | 
| +      return GetDiskFreeSpaceEx(os.getenv('SystemDrive'))
 | 
| +    else:
 | 
| +      (_, f_frsize, _, _, f_bavail, _, _, _, _, _) = os.statvfs(path)
 | 
| +      return f_frsize * f_bavail
 | 
| +
 | 
| +  def _PerformResumableUploadIfApplies(self, fp, dst_uri, canned_acl, headers):
 | 
| +    """
 | 
| +    Performs resumable upload if supported by provider and file is above
 | 
| +    threshold, else performs non-resumable upload.
 | 
| +
 | 
| +    Returns (elapsed_time, bytes_transferred, version-specific dst_uri).
 | 
| +    """
 | 
| +    start_time = time.time()
 | 
| +    # Determine file size different ways for case where fp is actually a wrapper
 | 
| +    # around a Key vs an actual file.
 | 
| +    if isinstance(fp, KeyFile):
 | 
| +      file_size = fp.getkey().size
 | 
| +    else:
 | 
| +      file_size = os.path.getsize(fp.name)
 | 
| +    (cb, num_cb, res_upload_handler) = self._GetTransferHandlers(
 | 
| +        dst_uri, file_size, True)
 | 
| +    if dst_uri.scheme == 'gs':
 | 
| +      # Resumable upload protocol is Google Cloud Storage-specific.
 | 
| +      dst_uri.set_contents_from_file(fp, headers, policy=canned_acl,
 | 
| +                                     cb=cb, num_cb=num_cb,
 | 
| +                                     res_upload_handler=res_upload_handler)
 | 
| +    else:
 | 
| +      dst_uri.set_contents_from_file(fp, headers, policy=canned_acl,
 | 
| +                                     cb=cb, num_cb=num_cb)
 | 
| +    if res_upload_handler:
 | 
| +      # ResumableUploadHandler does not update upload_start_point from its
 | 
| +      # initial value of -1 if transferring the whole file, so clamp at 0
 | 
| +      bytes_transferred = file_size - max(
 | 
| +                              res_upload_handler.upload_start_point, 0)
 | 
| +    else:
 | 
| +      bytes_transferred = file_size
 | 
| +    end_time = time.time()
 | 
| +    return (end_time - start_time, bytes_transferred, dst_uri)
 | 
| +
 | 
| +  def _PerformStreamingUpload(self, fp, dst_uri, headers, canned_acl=None):
 | 
| +    """
 | 
| +    Performs a streaming upload to the cloud.
 | 
| +
 | 
| +    Args:
 | 
| +      fp: The file whose contents to upload.
 | 
| +      dst_uri: Destination StorageUri.
 | 
| +      headers: A copy of the headers dictionary.
 | 
| +      canned_acl: Optional canned ACL to set on the object.
 | 
| +
 | 
| +    Returns (elapsed_time, bytes_transferred, version-specific dst_uri).
 | 
| +    """
 | 
| +    start_time = time.time()
 | 
| +
 | 
| +    if self.quiet:
 | 
| +      cb = None
 | 
| +    else:
 | 
| +      cb = self._StreamCopyCallbackHandler().call
 | 
| +    dst_uri.set_contents_from_stream(
 | 
| +        fp, headers, policy=canned_acl, cb=cb)
 | 
| +    try:
 | 
| +      bytes_transferred = fp.tell()
 | 
| +    except:
 | 
| +      bytes_transferred = 0
 | 
| +
 | 
| +    end_time = time.time()
 | 
| +    return (end_time - start_time, bytes_transferred, dst_uri)
 | 
| +
 | 
| +  def _SetContentTypeHeader(self, src_uri, headers):
 | 
| +    """
 | 
| +    Sets content type header to value specified in '-h Content-Type' option (if
 | 
| +    specified); else sets using Content-Type detection.
 | 
| +    """
 | 
| +    if 'Content-Type' in headers:
 | 
| +      # If empty string specified (i.e., -h "Content-Type:") set header to None,
 | 
| +      # which will inhibit boto from sending the CT header. Otherwise, boto will
 | 
| +      # pass through the user specified CT header.
 | 
| +      if not headers['Content-Type']:
 | 
| +        headers['Content-Type'] = None
 | 
| +      # else we'll keep the value passed in via -h option (not performing
 | 
| +      # content type detection).
 | 
| +    else:
 | 
| +      # Only do content type recognition is src_uri is a file. Object-to-object
 | 
| +      # copies with no -h Content-Type specified re-use the content type of the
 | 
| +      # source object.
 | 
| +      if src_uri.is_file_uri():
 | 
| +        object_name = src_uri.object_name
 | 
| +        content_type = None
 | 
| +        # Streams (denoted by '-') are expected to be 'application/octet-stream'
 | 
| +        # and 'file' would partially consume them.
 | 
| +        if object_name != '-':
 | 
| +          if self.USE_MAGICFILE:
 | 
| +            p = subprocess.Popen(['file', '--mime-type', object_name],
 | 
| +                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 | 
| +            output, error = p.communicate()
 | 
| +            if p.returncode != 0 or error:
 | 
| +              raise CommandException(
 | 
| +                  'Encountered error running "file --mime-type %s" '
 | 
| +                  '(returncode=%d).\n%s' % (object_name, p.returncode, error))
 | 
| +            # Parse output by removing line delimiter and splitting on last ":
 | 
| +            content_type = output.rstrip().rpartition(': ')[2]
 | 
| +          else:
 | 
| +            content_type = mimetypes.guess_type(object_name)[0]
 | 
| +        if not content_type:
 | 
| +          content_type = self.DEFAULT_CONTENT_TYPE
 | 
| +        headers['Content-Type'] = content_type
 | 
| +
 | 
| +  def _UploadFileToObject(self, src_key, src_uri, dst_uri, headers,
 | 
| +                          should_log=True):
 | 
| +    """Uploads a local file to an object.
 | 
| +
 | 
| +    Args:
 | 
| +      src_key: Source StorageUri. Must be a file URI.
 | 
| +      src_uri: Source StorageUri.
 | 
| +      dst_uri: Destination StorageUri.
 | 
| +      headers: The headers dictionary.
 | 
| +      should_log: bool indicator whether we should log this operation.
 | 
| +    Returns:
 | 
| +      (elapsed_time, bytes_transferred, version-specific dst_uri), excluding
 | 
| +      overhead like initial HEAD.
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException: if errors encountered.
 | 
| +    """
 | 
| +    gzip_exts = []
 | 
| +    canned_acl = None
 | 
| +    if self.sub_opts:
 | 
| +      for o, a in self.sub_opts:
 | 
| +        if o == '-a':
 | 
| +          canned_acls = dst_uri.canned_acls()
 | 
| +          if a not in canned_acls:
 | 
| +            raise CommandException('Invalid canned ACL "%s".' % a)
 | 
| +          canned_acl = a
 | 
| +        elif o == '-t':
 | 
| +          print('Warning: -t is deprecated, and will be removed in the future. '
 | 
| +                'Content type\ndetection is '
 | 
| +                'now performed by default, unless inhibited by specifying '
 | 
| +                'a\nContent-Type header via the -h option.')
 | 
| +        elif o == '-z':
 | 
| +          gzip_exts = a.split(',')
 | 
| +
 | 
| +    self._SetContentTypeHeader(src_uri, headers)
 | 
| +    if should_log:
 | 
| +      self._LogCopyOperation(src_uri, dst_uri, headers)
 | 
| +
 | 
| +    if 'Content-Language' not in headers:
 | 
| +       content_language = config.get_value('GSUtil', 'content_language')
 | 
| +       if content_language:
 | 
| +         headers['Content-Language'] = content_language
 | 
| +
 | 
| +    fname_parts = src_uri.object_name.split('.')
 | 
| +    if len(fname_parts) > 1 and fname_parts[-1] in gzip_exts:
 | 
| +      if self.debug:
 | 
| +        print 'Compressing %s (to tmp)...' % src_key
 | 
| +      (gzip_fh, gzip_path) = tempfile.mkstemp()
 | 
| +      gzip_fp = None
 | 
| +      try:
 | 
| +        # Check for temp space. Assume the compressed object is at most 2x
 | 
| +        # the size of the object (normally should compress to smaller than
 | 
| +        # the object)
 | 
| +        if (self._CheckFreeSpace(gzip_path)
 | 
| +            < 2*int(os.path.getsize(src_key.name))):
 | 
| +          raise CommandException('Inadequate temp space available to compress '
 | 
| +                                 '%s' % src_key.name)
 | 
| +        gzip_fp = gzip.open(gzip_path, 'wb')
 | 
| +        gzip_fp.writelines(src_key.fp)
 | 
| +      finally:
 | 
| +        if gzip_fp:
 | 
| +          gzip_fp.close()
 | 
| +        os.close(gzip_fh)
 | 
| +      headers['Content-Encoding'] = 'gzip'
 | 
| +      gzip_fp = open(gzip_path, 'rb')
 | 
| +      try:
 | 
| +        (elapsed_time, bytes_transferred, result_uri) = (
 | 
| +            self._PerformResumableUploadIfApplies(gzip_fp, dst_uri,
 | 
| +                                                  canned_acl, headers))
 | 
| +      finally:
 | 
| +        gzip_fp.close()
 | 
| +      try:
 | 
| +        os.unlink(gzip_path)
 | 
| +      # Windows sometimes complains the temp file is locked when you try to
 | 
| +      # delete it.
 | 
| +      except Exception, e:
 | 
| +        pass
 | 
| +    elif (src_key.is_stream()
 | 
| +          and dst_uri.get_provider().supports_chunked_transfer()):
 | 
| +      (elapsed_time, bytes_transferred, result_uri) = (
 | 
| +          self._PerformStreamingUpload(src_key.fp, dst_uri, headers,
 | 
| +                                       canned_acl))
 | 
| +    else:
 | 
| +      if src_key.is_stream():
 | 
| +        # For Providers that doesn't support chunked Transfers
 | 
| +        tmp = tempfile.NamedTemporaryFile()
 | 
| +        file_uri = self.suri_builder.StorageUri('file://%s' % tmp.name)
 | 
| +        try:
 | 
| +          file_uri.new_key(False, headers).set_contents_from_file(
 | 
| +              src_key.fp, headers)
 | 
| +          src_key = file_uri.get_key()
 | 
| +        finally:
 | 
| +          file_uri.close()
 | 
| +      try:
 | 
| +        (elapsed_time, bytes_transferred, result_uri) = (
 | 
| +            self._PerformResumableUploadIfApplies(src_key.fp, dst_uri,
 | 
| +                                                  canned_acl, headers))
 | 
| +      finally:
 | 
| +        if src_key.is_stream():
 | 
| +          tmp.close()
 | 
| +        else:
 | 
| +          src_key.close()
 | 
| +
 | 
| +    return (elapsed_time, bytes_transferred, result_uri)
 | 
| +
 | 
| +  def _DownloadObjectToFile(self, src_key, src_uri, dst_uri, headers,
 | 
| +                            should_log=True):
 | 
| +    """Downloads an object to a local file.
 | 
| +
 | 
| +    Args:
 | 
| +      src_key: Source StorageUri. Must be a file URI.
 | 
| +      src_uri: Source StorageUri.
 | 
| +      dst_uri: Destination StorageUri.
 | 
| +      headers: The headers dictionary.
 | 
| +      should_log: bool indicator whether we should log this operation.
 | 
| +    Returns:
 | 
| +      (elapsed_time, bytes_transferred, dst_uri), excluding overhead like
 | 
| +      initial HEAD.
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException: if errors encountered.
 | 
| +    """
 | 
| +    if should_log:
 | 
| +      self._LogCopyOperation(src_uri, dst_uri, headers)
 | 
| +    (cb, num_cb, res_download_handler) = self._GetTransferHandlers(
 | 
| +        dst_uri, src_key.size, False)
 | 
| +    file_name = dst_uri.object_name
 | 
| +    dir_name = os.path.dirname(file_name)
 | 
| +    if dir_name and not os.path.exists(dir_name):
 | 
| +      # Do dir creation in try block so can ignore case where dir already
 | 
| +      # exists. This is needed to avoid a race condition when running gsutil
 | 
| +      # -m cp.
 | 
| +      try:
 | 
| +        os.makedirs(dir_name)
 | 
| +      except OSError, e:
 | 
| +        if e.errno != errno.EEXIST:
 | 
| +          raise
 | 
| +    # For gzipped objects not named *.gz download to a temp file and unzip.
 | 
| +    if (hasattr(src_key, 'content_encoding')
 | 
| +        and src_key.content_encoding == 'gzip'
 | 
| +        and not file_name.endswith('.gz')):
 | 
| +      # We can't use tempfile.mkstemp() here because we need a predictable
 | 
| +      # filename for resumable downloads.
 | 
| +      download_file_name = '%s_.gztmp' % file_name
 | 
| +      need_to_unzip = True
 | 
| +    else:
 | 
| +      download_file_name = file_name
 | 
| +      need_to_unzip = False
 | 
| +    fp = None
 | 
| +    try:
 | 
| +      if res_download_handler:
 | 
| +        fp = open(download_file_name, 'ab')
 | 
| +      else:
 | 
| +        fp = open(download_file_name, 'wb')
 | 
| +      start_time = time.time()
 | 
| +      src_key.get_contents_to_file(fp, headers, cb=cb, num_cb=num_cb,
 | 
| +                                   res_download_handler=res_download_handler)
 | 
| +      # If a custom test method is defined, call it here. For the copy command,
 | 
| +      # test methods are expected to take one argument: an open file pointer,
 | 
| +      # and are used to perturb the open file during download to exercise
 | 
| +      # download error detection.
 | 
| +      if self.test_method:
 | 
| +        self.test_method(fp)
 | 
| +      end_time = time.time()
 | 
| +    finally:
 | 
| +      if fp:
 | 
| +        fp.close()
 | 
| +
 | 
| +    # Discard the md5 if we are resuming a partial download.
 | 
| +    if res_download_handler and res_download_handler.download_start_point:
 | 
| +      src_key.md5 = None
 | 
| +
 | 
| +    # Verify downloaded file checksum matched source object's checksum.
 | 
| +    self._CheckFinalMd5(src_key, download_file_name)
 | 
| +
 | 
| +    if res_download_handler:
 | 
| +      bytes_transferred = (
 | 
| +          src_key.size - res_download_handler.download_start_point)
 | 
| +    else:
 | 
| +      bytes_transferred = src_key.size
 | 
| +    if need_to_unzip:
 | 
| +      # Log that we're uncompressing if the file is big enough that
 | 
| +      # decompressing would make it look like the transfer "stalled" at the end.
 | 
| +      if not self.quiet and bytes_transferred > 10 * 1024 * 1024:
 | 
| +        self.THREADED_LOGGER.info('Uncompressing downloaded tmp file to %s...',
 | 
| +                                  file_name)
 | 
| +      # Downloaded gzipped file to a filename w/o .gz extension, so unzip.
 | 
| +      f_in = gzip.open(download_file_name, 'rb')
 | 
| +      f_out = open(file_name, 'wb')
 | 
| +      try:
 | 
| +        while True:
 | 
| +          data = f_in.read(8192)
 | 
| +          if not data:
 | 
| +            break
 | 
| +          f_out.write(data)
 | 
| +      finally:
 | 
| +        f_out.close()
 | 
| +        f_in.close()
 | 
| +        os.unlink(download_file_name)
 | 
| +    return (end_time - start_time, bytes_transferred, dst_uri)
 | 
| +
 | 
| +  def _PerformDownloadToStream(self, src_key, src_uri, str_fp, headers):
 | 
| +    (cb, num_cb, res_download_handler) = self._GetTransferHandlers(
 | 
| +                                src_uri, src_key.size, False)
 | 
| +    start_time = time.time()
 | 
| +    src_key.get_contents_to_file(str_fp, headers, cb=cb, num_cb=num_cb)
 | 
| +    end_time = time.time()
 | 
| +    bytes_transferred = src_key.size
 | 
| +    end_time = time.time()
 | 
| +    return (end_time - start_time, bytes_transferred)
 | 
| +
 | 
| +  def _CopyFileToFile(self, src_key, src_uri, dst_uri, headers):
 | 
| +    """Copies a local file to a local file.
 | 
| +
 | 
| +    Args:
 | 
| +      src_key: Source StorageUri. Must be a file URI.
 | 
| +      src_uri: Source StorageUri.
 | 
| +      dst_uri: Destination StorageUri.
 | 
| +      headers: The headers dictionary.
 | 
| +    Returns:
 | 
| +      (elapsed_time, bytes_transferred, dst_uri), excluding
 | 
| +      overhead like initial HEAD.
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException: if errors encountered.
 | 
| +    """
 | 
| +    self._LogCopyOperation(src_uri, dst_uri, headers)
 | 
| +    dst_key = dst_uri.new_key(False, headers)
 | 
| +    start_time = time.time()
 | 
| +    dst_key.set_contents_from_file(src_key.fp, headers)
 | 
| +    end_time = time.time()
 | 
| +    return (end_time - start_time, os.path.getsize(src_key.fp.name), dst_uri)
 | 
| +
 | 
| +  def _CopyObjToObjDaisyChainMode(self, src_key, src_uri, dst_uri, headers):
 | 
| +    """Copies from src_uri to dst_uri in "daisy chain" mode.
 | 
| +       See -D OPTION documentation about what daisy chain mode is.
 | 
| +
 | 
| +    Args:
 | 
| +      src_key: Source Key.
 | 
| +      src_uri: Source StorageUri.
 | 
| +      dst_uri: Destination StorageUri.
 | 
| +      headers: A copy of the headers dictionary.
 | 
| +
 | 
| +    Returns:
 | 
| +      (elapsed_time, bytes_transferred, version-specific dst_uri) excluding
 | 
| +      overhead like initial HEAD.
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException: if errors encountered.
 | 
| +    """
 | 
| +    self._SetContentTypeHeader(src_uri, headers)
 | 
| +    self._LogCopyOperation(src_uri, dst_uri, headers)
 | 
| +    canned_acl = None
 | 
| +    if self.sub_opts:
 | 
| +      for o, a in self.sub_opts:
 | 
| +        if o == '-a':
 | 
| +          canned_acls = dst_uri.canned_acls()
 | 
| +          if a not in canned_acls:
 | 
| +            raise CommandException('Invalid canned ACL "%s".' % a)
 | 
| +          canned_acl = a
 | 
| +        elif o == '-p':
 | 
| +          # We don't attempt to preserve ACLs across providers because
 | 
| +          # GCS and S3 support different ACLs and disjoint principals.
 | 
| +          raise NotImplementedError('Cross-provider cp -p not supported')
 | 
| +    return self._PerformResumableUploadIfApplies(KeyFile(src_key), dst_uri,
 | 
| +                                                 canned_acl, headers)
 | 
| +
 | 
| +  def _PerformCopy(self, src_uri, dst_uri):
 | 
| +    """Performs copy from src_uri to dst_uri, handling various special cases.
 | 
| +
 | 
| +    Args:
 | 
| +      src_uri: Source StorageUri.
 | 
| +      dst_uri: Destination StorageUri.
 | 
| +
 | 
| +    Returns:
 | 
| +      (elapsed_time, bytes_transferred, version-specific dst_uri) excluding
 | 
| +      overhead like initial HEAD.
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException: if errors encountered.
 | 
| +    """
 | 
| +    # Make a copy of the input headers each time so we can set a different
 | 
| +    # content type for each object.
 | 
| +    if self.headers:
 | 
| +      headers = self.headers.copy()
 | 
| +    else:
 | 
| +      headers = {}
 | 
| +
 | 
| +    src_key = src_uri.get_key(False, headers)
 | 
| +    if not src_key:
 | 
| +      raise CommandException('"%s" does not exist.' % src_uri)
 | 
| +
 | 
| +    # On Windows, stdin is opened as text mode instead of binary which causes
 | 
| +    # problems when piping a binary file, so this switches it to binary mode.
 | 
| +    if IS_WINDOWS and src_uri.is_file_uri() and src_key.is_stream():
 | 
| +      import msvcrt
 | 
| +      msvcrt.setmode(src_key.fp.fileno(), os.O_BINARY)
 | 
| +
 | 
| +    if self.no_clobber:
 | 
| +        # There are two checks to prevent clobbering:
 | 
| +        # 1) The first check is to see if the item
 | 
| +        #    already exists at the destination and prevent the upload/download
 | 
| +        #    from happening. This is done by the exists() call.
 | 
| +        # 2) The second check is only relevant if we are writing to gs. We can
 | 
| +        #    enforce that the server only writes the object if it doesn't exist
 | 
| +        #    by specifying the header below. This check only happens at the
 | 
| +        #    server after the complete file has been uploaded. We specify this
 | 
| +        #    header to prevent a race condition where a destination file may
 | 
| +        #    be created after the first check and before the file is fully
 | 
| +        #    uploaded.
 | 
| +        # In order to save on unnecessary uploads/downloads we perform both
 | 
| +        # checks. However, this may come at the cost of additional HTTP calls.
 | 
| +        if dst_uri.exists(headers):
 | 
| +          if not self.quiet:
 | 
| +            self.THREADED_LOGGER.info('Skipping existing item: %s' %
 | 
| +                                      dst_uri.uri)
 | 
| +          return (0, 0, None)
 | 
| +        if dst_uri.is_cloud_uri() and dst_uri.scheme == 'gs':
 | 
| +          headers['x-goog-if-generation-match'] = '0'
 | 
| +
 | 
| +    if src_uri.is_cloud_uri() and dst_uri.is_cloud_uri():
 | 
| +      if src_uri.scheme == dst_uri.scheme and not self.daisy_chain:
 | 
| +        return self._CopyObjToObjInTheCloud(src_key, src_uri, dst_uri, headers)
 | 
| +      else:
 | 
| +        return self._CopyObjToObjDaisyChainMode(src_key, src_uri, dst_uri,
 | 
| +                                                headers)
 | 
| +    elif src_uri.is_file_uri() and dst_uri.is_cloud_uri():
 | 
| +      return self._UploadFileToObject(src_key, src_uri, dst_uri, headers)
 | 
| +    elif src_uri.is_cloud_uri() and dst_uri.is_file_uri():
 | 
| +      return self._DownloadObjectToFile(src_key, src_uri, dst_uri, headers)
 | 
| +    elif src_uri.is_file_uri() and dst_uri.is_file_uri():
 | 
| +      return self._CopyFileToFile(src_key, src_uri, dst_uri, headers)
 | 
| +    else:
 | 
| +      raise CommandException('Unexpected src/dest case')
 | 
| +
 | 
| +  def _ExpandDstUri(self, dst_uri_str):
 | 
| +    """
 | 
| +    Expands wildcard if present in dst_uri_str.
 | 
| +
 | 
| +    Args:
 | 
| +      dst_uri_str: String representation of requested dst_uri.
 | 
| +
 | 
| +    Returns:
 | 
| +        (exp_dst_uri, have_existing_dst_container)
 | 
| +        where have_existing_dst_container is a bool indicating whether
 | 
| +        exp_dst_uri names an existing directory, bucket, or bucket subdirectory.
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException: if dst_uri_str matched more than 1 URI.
 | 
| +    """
 | 
| +    dst_uri = self.suri_builder.StorageUri(dst_uri_str)
 | 
| +
 | 
| +    # Handle wildcarded dst_uri case.
 | 
| +    if ContainsWildcard(dst_uri):
 | 
| +      blr_expansion = list(self.WildcardIterator(dst_uri))
 | 
| +      if len(blr_expansion) != 1:
 | 
| +        raise CommandException('Destination (%s) must match exactly 1 URI' %
 | 
| +                               dst_uri_str)
 | 
| +      blr = blr_expansion[0]
 | 
| +      uri = blr.GetUri()
 | 
| +      if uri.is_cloud_uri():
 | 
| +        return (uri, uri.names_bucket() or blr.HasPrefix()
 | 
| +                or blr.GetKey().endswith('/'))
 | 
| +      else:
 | 
| +        return (uri, uri.names_directory())
 | 
| +
 | 
| +    # Handle non-wildcarded dst_uri:
 | 
| +    if dst_uri.is_file_uri():
 | 
| +      return (dst_uri, dst_uri.names_directory())
 | 
| +    if dst_uri.names_bucket():
 | 
| +      return (dst_uri, True)
 | 
| +    # For object URIs check 3 cases: (a) if the name ends with '/' treat as a
 | 
| +    # subdir; else, perform a wildcard expansion with dst_uri + "*" and then
 | 
| +    # find if (b) there's a Prefix matching dst_uri, or (c) name is of form
 | 
| +    # dir_$folder$ (and in both these cases also treat dir as a subdir).
 | 
| +    if dst_uri.is_cloud_uri() and dst_uri_str.endswith('/'):
 | 
| +      return (dst_uri, True)
 | 
| +    blr_expansion = list(self.WildcardIterator(
 | 
| +        '%s*' % dst_uri_str.rstrip(dst_uri.delim)))
 | 
| +    for blr in blr_expansion:
 | 
| +      if blr.GetRStrippedUriString().endswith('_$folder$'):
 | 
| +        return (dst_uri, True)
 | 
| +      if blr.GetRStrippedUriString() == dst_uri_str.rstrip(dst_uri.delim):
 | 
| +        return (dst_uri, blr.HasPrefix())
 | 
| +    return (dst_uri, False)
 | 
| +
 | 
| +  def _ConstructDstUri(self, src_uri, exp_src_uri,
 | 
| +                       src_uri_names_container, src_uri_expands_to_multi,
 | 
| +                       have_multiple_srcs, exp_dst_uri,
 | 
| +                       have_existing_dest_subdir):
 | 
| +    """
 | 
| +    Constructs the destination URI for a given exp_src_uri/exp_dst_uri pair,
 | 
| +    using context-dependent naming rules that mimic Linux cp and mv behavior.
 | 
| +
 | 
| +    Args:
 | 
| +      src_uri: src_uri to be copied.
 | 
| +      exp_src_uri: Single StorageUri from wildcard expansion of src_uri.
 | 
| +      src_uri_names_container: True if src_uri names a container (including the
 | 
| +          case of a wildcard-named bucket subdir (like gs://bucket/abc,
 | 
| +          where gs://bucket/abc/* matched some objects). Note that this is
 | 
| +          additional semantics tha src_uri.names_container() doesn't understand
 | 
| +          because the latter only understands StorageUris, not wildcards.
 | 
| +      src_uri_expands_to_multi: True if src_uri expanded to multiple URIs.
 | 
| +      have_multiple_srcs: True if this is a multi-source request. This can be
 | 
| +          true if src_uri wildcard-expanded to multiple URIs or if there were
 | 
| +          multiple source URIs in the request.
 | 
| +      exp_dst_uri: the expanded StorageUri requested for the cp destination.
 | 
| +          Final written path is constructed from this plus a context-dependent
 | 
| +          variant of src_uri.
 | 
| +      have_existing_dest_subdir: bool indicator whether dest is an existing
 | 
| +        subdirectory.
 | 
| +
 | 
| +    Returns:
 | 
| +      StorageUri to use for copy.
 | 
| +
 | 
| +    Raises:
 | 
| +      CommandException if destination object name not specified for
 | 
| +      source and source is a stream.
 | 
| +    """
 | 
| +    if self._ShouldTreatDstUriAsSingleton(
 | 
| +        have_multiple_srcs, have_existing_dest_subdir, exp_dst_uri):
 | 
| +      # We're copying one file or object to one file or object.
 | 
| +      return exp_dst_uri
 | 
| +
 | 
| +    if exp_src_uri.is_stream():
 | 
| +      if exp_dst_uri.names_container():
 | 
| +        raise CommandException('Destination object name needed when '
 | 
| +                               'source is a stream')
 | 
| +      return exp_dst_uri
 | 
| +
 | 
| +    if not self.recursion_requested and not have_multiple_srcs:
 | 
| +      # We're copying one file or object to a subdirectory. Append final comp
 | 
| +      # of exp_src_uri to exp_dst_uri.
 | 
| +      src_final_comp = exp_src_uri.object_name.rpartition(src_uri.delim)[-1]
 | 
| +      return self.suri_builder.StorageUri('%s%s%s' % (
 | 
| +          exp_dst_uri.uri.rstrip(exp_dst_uri.delim), exp_dst_uri.delim,
 | 
| +          src_final_comp))
 | 
| +
 | 
| +    # Else we're copying multiple sources to a directory, bucket, or a bucket
 | 
| +    # "sub-directory".
 | 
| +
 | 
| +    # Ensure exp_dst_uri ends in delim char if we're doing a multi-src copy or
 | 
| +    # a copy to a directory. (The check for copying to a directory needs
 | 
| +    # special-case handling so that the command:
 | 
| +    #   gsutil cp gs://bucket/obj dir
 | 
| +    # will turn into file://dir/ instead of file://dir -- the latter would cause
 | 
| +    # the file "dirobj" to be created.)
 | 
| +    # Note: need to check have_multiple_srcs or src_uri.names_container()
 | 
| +    # because src_uri could be a bucket containing a single object, named
 | 
| +    # as gs://bucket.
 | 
| +    if ((have_multiple_srcs or src_uri.names_container()
 | 
| +         or os.path.isdir(exp_dst_uri.object_name))
 | 
| +        and not exp_dst_uri.uri.endswith(exp_dst_uri.delim)):
 | 
| +      exp_dst_uri = exp_dst_uri.clone_replace_name(
 | 
| +         '%s%s' % (exp_dst_uri.object_name, exp_dst_uri.delim)
 | 
| +      )
 | 
| +
 | 
| +    # Making naming behavior match how things work with local Linux cp and mv
 | 
| +    # operations depends on many factors, including whether the destination is a
 | 
| +    # container, the plurality of the source(s), and whether the mv command is
 | 
| +    # being used:
 | 
| +    # 1. For the "mv" command that specifies a non-existent destination subdir,
 | 
| +    #    renaming should occur at the level of the src subdir, vs appending that
 | 
| +    #    subdir beneath the dst subdir like is done for copying. For example:
 | 
| +    #      gsutil rm -R gs://bucket
 | 
| +    #      gsutil cp -R dir1 gs://bucket
 | 
| +    #      gsutil cp -R dir2 gs://bucket/subdir1
 | 
| +    #      gsutil mv gs://bucket/subdir1 gs://bucket/subdir2
 | 
| +    #    would (if using cp naming behavior) end up with paths like:
 | 
| +    #      gs://bucket/subdir2/subdir1/dir2/.svn/all-wcprops
 | 
| +    #    whereas mv naming behavior should result in:
 | 
| +    #      gs://bucket/subdir2/dir2/.svn/all-wcprops
 | 
| +    # 2. Copying from directories, buckets, or bucket subdirs should result in
 | 
| +    #    objects/files mirroring the source directory hierarchy. For example:
 | 
| +    #      gsutil cp dir1/dir2 gs://bucket
 | 
| +    #    should create the object gs://bucket/dir2/file2, assuming dir1/dir2
 | 
| +    #    contains file2).
 | 
| +    #    To be consistent with Linux cp behavior, there's one more wrinkle when
 | 
| +    #    working with subdirs: The resulting object names depend on whether the
 | 
| +    #    destination subdirectory exists. For example, if gs://bucket/subdir
 | 
| +    #    exists, the command:
 | 
| +    #      gsutil cp -R dir1/dir2 gs://bucket/subdir
 | 
| +    #    should create objects named like gs://bucket/subdir/dir2/a/b/c. In
 | 
| +    #    contrast, if gs://bucket/subdir does not exist, this same command
 | 
| +    #    should create objects named like gs://bucket/subdir/a/b/c.
 | 
| +    # 3. Copying individual files or objects to dirs, buckets or bucket subdirs
 | 
| +    #    should result in objects/files named by the final source file name
 | 
| +    #    component. Example:
 | 
| +    #      gsutil cp dir1/*.txt gs://bucket
 | 
| +    #    should create the objects gs://bucket/f1.txt and gs://bucket/f2.txt,
 | 
| +    #    assuming dir1 contains f1.txt and f2.txt.
 | 
| +
 | 
| +    if (self.perform_mv and self.recursion_requested
 | 
| +        and src_uri_expands_to_multi and not have_existing_dest_subdir):
 | 
| +      # Case 1. Handle naming rules for bucket subdir mv. Here we want to
 | 
| +      # line up the src_uri against its expansion, to find the base to build
 | 
| +      # the new name. For example, running the command:
 | 
| +      #   gsutil mv gs://bucket/abcd gs://bucket/xyz
 | 
| +      # when processing exp_src_uri=gs://bucket/abcd/123
 | 
| +      # exp_src_uri_tail should become /123
 | 
| +      # Note: mv.py code disallows wildcard specification of source URI.
 | 
| +      exp_src_uri_tail = exp_src_uri.uri[len(src_uri.uri):]
 | 
| +      dst_key_name = '%s/%s' % (exp_dst_uri.object_name.rstrip('/'),
 | 
| +                                exp_src_uri_tail.strip('/'))
 | 
| +      return exp_dst_uri.clone_replace_name(dst_key_name)
 | 
| +
 | 
| +    if src_uri_names_container and not exp_dst_uri.names_file():
 | 
| +      # Case 2. Build dst_key_name from subpath of exp_src_uri past
 | 
| +      # where src_uri ends. For example, for src_uri=gs://bucket/ and
 | 
| +      # exp_src_uri=gs://bucket/src_subdir/obj, dst_key_name should be
 | 
| +      # src_subdir/obj.
 | 
| +      src_uri_path_sans_final_dir = _GetPathBeforeFinalDir(src_uri)
 | 
| +      dst_key_name = exp_src_uri.uri[
 | 
| +         len(src_uri_path_sans_final_dir):].lstrip(src_uri.delim)
 | 
| +      # Handle case where dst_uri is a non-existent subdir.
 | 
| +      if not have_existing_dest_subdir:
 | 
| +        dst_key_name = dst_key_name.partition(src_uri.delim)[-1]
 | 
| +      # Handle special case where src_uri was a directory named with '.' or
 | 
| +      # './', so that running a command like:
 | 
| +      #   gsutil cp -r . gs://dest
 | 
| +      # will produce obj names of the form gs://dest/abc instead of
 | 
| +      # gs://dest/./abc.
 | 
| +      if dst_key_name.startswith('.%s' % os.sep):
 | 
| +        dst_key_name = dst_key_name[2:]
 | 
| +
 | 
| +    else:
 | 
| +      # Case 3.
 | 
| +      dst_key_name = exp_src_uri.object_name.rpartition(src_uri.delim)[-1]
 | 
| +
 | 
| +    if (exp_dst_uri.is_file_uri()
 | 
| +        or self._ShouldTreatDstUriAsBucketSubDir(
 | 
| +            have_multiple_srcs, exp_dst_uri, have_existing_dest_subdir)):
 | 
| +      if exp_dst_uri.object_name.endswith(exp_dst_uri.delim):
 | 
| +        dst_key_name = '%s%s%s' % (
 | 
| +            exp_dst_uri.object_name.rstrip(exp_dst_uri.delim),
 | 
| +            exp_dst_uri.delim, dst_key_name)
 | 
| +      else:
 | 
| +        delim = exp_dst_uri.delim if exp_dst_uri.object_name else ''
 | 
| +        dst_key_name = '%s%s%s' % (exp_dst_uri.object_name, delim, dst_key_name)
 | 
| +
 | 
| +    return exp_dst_uri.clone_replace_name(dst_key_name)
 | 
| +
 | 
| +  def _FixWindowsNaming(self, src_uri, dst_uri):
 | 
| +    """
 | 
| +    Rewrites the destination URI built by _ConstructDstUri() to translate
 | 
| +    Windows pathnames to cloud pathnames if needed.
 | 
| +
 | 
| +    Args:
 | 
| +      src_uri: Source URI to be copied.
 | 
| +      dst_uri: The destination URI built by _ConstructDstUri().
 | 
| +
 | 
| +    Returns:
 | 
| +      StorageUri to use for copy.
 | 
| +    """
 | 
| +    if (src_uri.is_file_uri() and src_uri.delim == '\\'
 | 
| +        and dst_uri.is_cloud_uri()):
 | 
| +      trans_uri_str = re.sub(r'\\', '/', dst_uri.uri)
 | 
| +      dst_uri = self.suri_builder.StorageUri(trans_uri_str)
 | 
| +    return dst_uri
 | 
| +
 | 
| +  # Command entry point.
 | 
| +  def RunCommand(self):
 | 
| +
 | 
| +    # Inner funcs.
 | 
| +    def _CopyExceptionHandler(e):
 | 
| +      """Simple exception handler to allow post-completion status."""
 | 
| +      self.THREADED_LOGGER.error(str(e))
 | 
| +      self.copy_failure_count += 1
 | 
| +
 | 
| +    def _CopyFunc(name_expansion_result):
 | 
| +      """Worker function for performing the actual copy (and rm, for mv)."""
 | 
| +      if self.perform_mv:
 | 
| +        cmd_name = 'mv'
 | 
| +      else:
 | 
| +        cmd_name = self.command_name
 | 
| +      src_uri = self.suri_builder.StorageUri(
 | 
| +          name_expansion_result.GetSrcUriStr())
 | 
| +      exp_src_uri = self.suri_builder.StorageUri(
 | 
| +          name_expansion_result.GetExpandedUriStr())
 | 
| +      src_uri_names_container = name_expansion_result.NamesContainer()
 | 
| +      src_uri_expands_to_multi = name_expansion_result.NamesContainer()
 | 
| +      have_multiple_srcs = name_expansion_result.IsMultiSrcRequest()
 | 
| +      have_existing_dest_subdir = (
 | 
| +          name_expansion_result.HaveExistingDstContainer())
 | 
| +
 | 
| +      if src_uri.names_provider():
 | 
| +        raise CommandException(
 | 
| +            'The %s command does not allow provider-only source URIs (%s)' %
 | 
| +            (cmd_name, src_uri))
 | 
| +      if have_multiple_srcs:
 | 
| +        self._InsistDstUriNamesContainer(exp_dst_uri,
 | 
| +                                         have_existing_dst_container,
 | 
| +                                         cmd_name)
 | 
| +
 | 
| +      if self.perform_mv:
 | 
| +        if name_expansion_result.NamesContainer():
 | 
| +          # Use recursion_requested when performing name expansion for the
 | 
| +          # directory mv case so we can determine if any of the source URIs are
 | 
| +          # directories (and then use cp -R and rm -R to perform the move, to
 | 
| +          # match the behavior of Linux mv (which when moving a directory moves
 | 
| +          # all the contained files).
 | 
| +          self.recursion_requested = True
 | 
| +          # Disallow wildcard src URIs when moving directories, as supporting it
 | 
| +          # would make the name transformation too complex and would also be
 | 
| +          # dangerous (e.g., someone could accidentally move many objects to the
 | 
| +          # wrong name, or accidentally overwrite many objects).
 | 
| +          if ContainsWildcard(src_uri):
 | 
| +            raise CommandException('The mv command disallows naming source '
 | 
| +                                   'directories using wildcards')
 | 
| +
 | 
| +      if (exp_dst_uri.is_file_uri()
 | 
| +          and not os.path.exists(exp_dst_uri.object_name)
 | 
| +          and have_multiple_srcs):
 | 
| +        os.makedirs(exp_dst_uri.object_name)
 | 
| +
 | 
| +      dst_uri = self._ConstructDstUri(src_uri, exp_src_uri,
 | 
| +                                      src_uri_names_container,
 | 
| +                                      src_uri_expands_to_multi,
 | 
| +                                      have_multiple_srcs, exp_dst_uri,
 | 
| +                                      have_existing_dest_subdir)
 | 
| +      dst_uri = self._FixWindowsNaming(src_uri, dst_uri)
 | 
| +
 | 
| +      self._CheckForDirFileConflict(exp_src_uri, dst_uri)
 | 
| +      if self._SrcDstSame(exp_src_uri, dst_uri):
 | 
| +        raise CommandException('%s: "%s" and "%s" are the same file - '
 | 
| +                               'abort.' % (cmd_name, exp_src_uri, dst_uri))
 | 
| +
 | 
| +      if dst_uri.is_cloud_uri() and dst_uri.is_version_specific:
 | 
| +        raise CommandException('%s: a version-specific URI\n(%s)\ncannot be '
 | 
| +                               'the destination for gsutil cp - abort.'
 | 
| +                               % (cmd_name, dst_uri))
 | 
| +
 | 
| +      elapsed_time = bytes_transferred = 0
 | 
| +      try:
 | 
| +        (elapsed_time, bytes_transferred, result_uri) = (
 | 
| +            self._PerformCopy(exp_src_uri, dst_uri))
 | 
| +      except Exception, e:
 | 
| +        if self._IsNoClobberServerException(e):
 | 
| +          if not self.quiet:
 | 
| +            self.THREADED_LOGGER.info('Rejected (noclobber): %s' % dst_uri.uri)
 | 
| +        elif self.continue_on_error:
 | 
| +          if not self.quiet:
 | 
| +            self.THREADED_LOGGER.error('Error copying %s: %s' % (src_uri.uri,
 | 
| +              str(e)))
 | 
| +          self.copy_failure_count += 1
 | 
| +        else:
 | 
| +          raise
 | 
| +      if self.print_ver:
 | 
| +        # Some cases don't return a version-specific URI (e.g., if destination
 | 
| +        # is a file).
 | 
| +        if hasattr(result_uri, 'version_specific_uri'):
 | 
| +          self.THREADED_LOGGER.info('Created: %s' %
 | 
| +                                    result_uri.version_specific_uri)
 | 
| +        else:
 | 
| +          self.THREADED_LOGGER.info('Created: %s' % result_uri.uri)
 | 
| +
 | 
| +      # TODO: If we ever use -n (noclobber) with -M (move) (not possible today
 | 
| +      # since we call copy internally from move and don't specify the -n flag)
 | 
| +      # we'll need to only remove the source when we have not skipped the
 | 
| +      # destination.
 | 
| +      if self.perform_mv:
 | 
| +        if not self.quiet:
 | 
| +          self.THREADED_LOGGER.info('Removing %s...', exp_src_uri)
 | 
| +        exp_src_uri.delete_key(validate=False, headers=self.headers)
 | 
| +      stats_lock.acquire()
 | 
| +      self.total_elapsed_time += elapsed_time
 | 
| +      self.total_bytes_transferred += bytes_transferred
 | 
| +      stats_lock.release()
 | 
| +
 | 
| +    # Start of RunCommand code.
 | 
| +    self._ParseArgs()
 | 
| +
 | 
| +    self.total_elapsed_time = self.total_bytes_transferred = 0
 | 
| +    if self.args[-1] == '-' or self.args[-1] == 'file://-':
 | 
| +      self._HandleStreamingDownload()
 | 
| +      return 0
 | 
| +
 | 
| +    if self.read_args_from_stdin:
 | 
| +      if len(self.args) != 1:
 | 
| +        raise CommandException('Source URIs cannot be specified with -I option')
 | 
| +      uri_strs = self._StdinIterator()
 | 
| +    else:
 | 
| +      if len(self.args) < 2:
 | 
| +        raise CommandException('Wrong number of arguments for "cp" command.')
 | 
| +      uri_strs = self.args[0:len(self.args)-1]
 | 
| +
 | 
| +    (exp_dst_uri, have_existing_dst_container) = self._ExpandDstUri(
 | 
| +         self.args[-1])
 | 
| +    name_expansion_iterator = NameExpansionIterator(
 | 
| +        self.command_name, self.proj_id_handler, self.headers, self.debug,
 | 
| +        self.bucket_storage_uri_class, uri_strs,
 | 
| +        self.recursion_requested or self.perform_mv,
 | 
| +        have_existing_dst_container)
 | 
| +
 | 
| +    # Use a lock to ensure accurate statistics in the face of
 | 
| +    # multi-threading/multi-processing.
 | 
| +    stats_lock = threading.Lock()
 | 
| +
 | 
| +    # Tracks if any copies failed.
 | 
| +    self.copy_failure_count = 0
 | 
| +
 | 
| +    # Start the clock.
 | 
| +    start_time = time.time()
 | 
| +
 | 
| +    # Tuple of attributes to share/manage across multiple processes in
 | 
| +    # parallel (-m) mode.
 | 
| +    shared_attrs = ('copy_failure_count', 'total_bytes_transferred')
 | 
| +
 | 
| +    # Perform copy requests in parallel (-m) mode, if requested, using
 | 
| +    # configured number of parallel processes and threads. Otherwise,
 | 
| +    # perform requests with sequential function calls in current process.
 | 
| +    self.Apply(_CopyFunc, name_expansion_iterator, _CopyExceptionHandler,
 | 
| +               shared_attrs)
 | 
| +    if self.debug:
 | 
| +      print 'total_bytes_transferred:' + str(self.total_bytes_transferred)
 | 
| +
 | 
| +    end_time = time.time()
 | 
| +    self.total_elapsed_time = end_time - start_time
 | 
| +
 | 
| +    # Sometimes, particularly when running unit tests, the total elapsed time
 | 
| +    # is really small. On Windows, the timer resolution is too small and
 | 
| +    # causes total_elapsed_time to be zero.
 | 
| +    try:
 | 
| +      float(self.total_bytes_transferred) / float(self.total_elapsed_time)
 | 
| +    except ZeroDivisionError:
 | 
| +      self.total_elapsed_time = 0.01
 | 
| +
 | 
| +    self.total_bytes_per_second = (float(self.total_bytes_transferred) /
 | 
| +                                   float(self.total_elapsed_time))
 | 
| +
 | 
| +    if self.debug == 3:
 | 
| +      # Note that this only counts the actual GET and PUT bytes for the copy
 | 
| +      # - not any transfers for doing wildcard expansion, the initial HEAD
 | 
| +      # request boto performs when doing a bucket.get_key() operation, etc.
 | 
| +      if self.total_bytes_transferred != 0:
 | 
| +        self.THREADED_LOGGER.info(
 | 
| +            'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)',
 | 
| +                self.total_bytes_transferred, self.total_elapsed_time,
 | 
| +                MakeHumanReadable(self.total_bytes_per_second))
 | 
| +    if self.copy_failure_count:
 | 
| +      plural_str = ''
 | 
| +      if self.copy_failure_count > 1:
 | 
| +        plural_str = 's'
 | 
| +      raise CommandException('%d file%s/object%s could not be transferred.' % (
 | 
| +                             self.copy_failure_count, plural_str, plural_str))
 | 
| +
 | 
| +    return 0
 | 
| +
 | 
| +  def _ParseArgs(self):
 | 
| +    self.perform_mv = False
 | 
| +    self.exclude_symlinks = False
 | 
| +    self.quiet = False
 | 
| +    self.no_clobber = False
 | 
| +    self.continue_on_error = False
 | 
| +    self.daisy_chain = False
 | 
| +    self.read_args_from_stdin = False
 | 
| +    self.print_ver = False
 | 
| +    # self.recursion_requested initialized in command.py (so can be checked
 | 
| +    # in parent class for all commands).
 | 
| +    if self.sub_opts:
 | 
| +      for o, unused_a in self.sub_opts:
 | 
| +        if o == '-c':
 | 
| +          self.continue_on_error = True
 | 
| +        elif o == '-D':
 | 
| +          self.daisy_chain = True
 | 
| +        elif o == '-e':
 | 
| +          self.exclude_symlinks = True
 | 
| +        elif o == '-I':
 | 
| +          self.read_args_from_stdin = True
 | 
| +        elif o == '-M':
 | 
| +          # Note that we signal to the cp command to perform a move (copy
 | 
| +          # followed by remove) and use directory-move naming rules by passing
 | 
| +          # the undocumented (for internal use) -M option when running the cp
 | 
| +          # command from mv.py.
 | 
| +          self.perform_mv = True
 | 
| +        elif o == '-n':
 | 
| +          self.no_clobber = True
 | 
| +        elif o == '-q':
 | 
| +          self.quiet = True
 | 
| +        elif o == '-r' or o == '-R':
 | 
| +          self.recursion_requested = True
 | 
| +        elif o == '-v':
 | 
| +          self.print_ver = True
 | 
| +
 | 
| +  def _HandleStreamingDownload(self):
 | 
| +    # Destination is <STDOUT>. Manipulate sys.stdout so as to redirect all
 | 
| +    # debug messages to <STDERR>.
 | 
| +    stdout_fp = sys.stdout
 | 
| +    sys.stdout = sys.stderr
 | 
| +    did_some_work = False
 | 
| +    for uri_str in self.args[0:len(self.args)-1]:
 | 
| +      for uri in self.WildcardIterator(uri_str).IterUris():
 | 
| +        did_some_work = True
 | 
| +        key = uri.get_key(False, self.headers)
 | 
| +        (elapsed_time, bytes_transferred) = self._PerformDownloadToStream(
 | 
| +            key, uri, stdout_fp, self.headers)
 | 
| +        self.total_elapsed_time += elapsed_time
 | 
| +        self.total_bytes_transferred += bytes_transferred
 | 
| +    if not did_some_work:
 | 
| +      raise CommandException('No URIs matched')
 | 
| +    if self.debug == 3:
 | 
| +      if self.total_bytes_transferred != 0:
 | 
| +        self.THREADED_LOGGER.info(
 | 
| +            'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)',
 | 
| +                self.total_bytes_transferred, self.total_elapsed_time,
 | 
| +                 MakeHumanReadable(float(self.total_bytes_transferred) /
 | 
| +                                   float(self.total_elapsed_time)))
 | 
| +
 | 
| +  def _StdinIterator(self):
 | 
| +    """A generator function that returns lines from stdin."""
 | 
| +    for line in sys.stdin:
 | 
| +      # Strip CRLF.
 | 
| +      yield line.rstrip()
 | 
| +
 | 
| +  def _SrcDstSame(self, src_uri, dst_uri):
 | 
| +    """Checks if src_uri and dst_uri represent the same object or file.
 | 
| +
 | 
| +    We don't handle anything about hard or symbolic links.
 | 
| +
 | 
| +    Args:
 | 
| +      src_uri: Source StorageUri.
 | 
| +      dst_uri: Destination StorageUri.
 | 
| +
 | 
| +    Returns:
 | 
| +      Bool indicator.
 | 
| +    """
 | 
| +    if src_uri.is_file_uri() and dst_uri.is_file_uri():
 | 
| +      # Translate a/b/./c to a/b/c, so src=dst comparison below works.
 | 
| +      new_src_path = os.path.normpath(src_uri.object_name)
 | 
| +      new_dst_path = os.path.normpath(dst_uri.object_name)
 | 
| +      return (src_uri.clone_replace_name(new_src_path).uri ==
 | 
| +              dst_uri.clone_replace_name(new_dst_path).uri)
 | 
| +    else:
 | 
| +      return (src_uri.uri == dst_uri.uri and
 | 
| +              src_uri.generation == dst_uri.generation and
 | 
| +              src_uri.version_id == dst_uri.version_id)
 | 
| +
 | 
| +  def _ShouldTreatDstUriAsBucketSubDir(self, have_multiple_srcs, dst_uri,
 | 
| +                                       have_existing_dest_subdir):
 | 
| +    """
 | 
| +    Checks whether dst_uri should be treated as a bucket "sub-directory". The
 | 
| +    decision about whether something constitutes a bucket "sub-directory"
 | 
| +    depends on whether there are multiple sources in this request and whether
 | 
| +    there is an existing bucket subdirectory. For example, when running the
 | 
| +    command:
 | 
| +      gsutil cp file gs://bucket/abc
 | 
| +    if there's no existing gs://bucket/abc bucket subdirectory we should copy
 | 
| +    file to the object gs://bucket/abc. In contrast, if
 | 
| +    there's an existing gs://bucket/abc bucket subdirectory we should copy
 | 
| +    file to gs://bucket/abc/file. And regardless of whether gs://bucket/abc
 | 
| +    exists, when running the command:
 | 
| +      gsutil cp file1 file2 gs://bucket/abc
 | 
| +    we should copy file1 to gs://bucket/abc/file1 (and similarly for file2).
 | 
| +
 | 
| +    Note that we don't disallow naming a bucket "sub-directory" where there's
 | 
| +    already an object at that URI. For example it's legitimate (albeit
 | 
| +    confusing) to have an object called gs://bucket/dir and
 | 
| +    then run the command
 | 
| +    gsutil cp file1 file2 gs://bucket/dir
 | 
| +    Doing so will end up with objects gs://bucket/dir, gs://bucket/dir/file1,
 | 
| +    and gs://bucket/dir/file2.
 | 
| +
 | 
| +    Args:
 | 
| +      have_multiple_srcs: Bool indicator of whether this is a multi-source
 | 
| +          operation.
 | 
| +      dst_uri: StorageUri to check.
 | 
| +      have_existing_dest_subdir: bool indicator whether dest is an existing
 | 
| +        subdirectory.
 | 
| +
 | 
| +    Returns:
 | 
| +      bool indicator.
 | 
| +    """
 | 
| +    return ((have_multiple_srcs and dst_uri.is_cloud_uri())
 | 
| +            or (have_existing_dest_subdir))
 | 
| +
 | 
| +  def _ShouldTreatDstUriAsSingleton(self, have_multiple_srcs,
 | 
| +                                    have_existing_dest_subdir, dst_uri):
 | 
| +    """
 | 
| +    Checks that dst_uri names a singleton (file or object) after
 | 
| +    dir/wildcard expansion. The decision is more nuanced than simply
 | 
| +    dst_uri.names_singleton()) because of the possibility that an object path
 | 
| +    might name a bucket sub-directory.
 | 
| +
 | 
| +    Args:
 | 
| +      have_multiple_srcs: Bool indicator of whether this is a multi-source
 | 
| +          operation.
 | 
| +      have_existing_dest_subdir: bool indicator whether dest is an existing
 | 
| +        subdirectory.
 | 
| +      dst_uri: StorageUri to check.
 | 
| +
 | 
| +    Returns:
 | 
| +      bool indicator.
 | 
| +    """
 | 
| +    if have_multiple_srcs:
 | 
| +      # Only a file meets the criteria in this case.
 | 
| +      return dst_uri.names_file()
 | 
| +    return not have_existing_dest_subdir and dst_uri.names_singleton()
 | 
| +
 | 
| +  def _IsNoClobberServerException(self, e):
 | 
| +    """
 | 
| +    Checks to see if the server attempted to clobber a file after we specified
 | 
| +    in the header that we didn't want the file clobbered.
 | 
| +
 | 
| +    Args:
 | 
| +      e: The Exception that was generated by a failed copy operation
 | 
| +
 | 
| +    Returns:
 | 
| +      bool indicator - True indicates that the server did attempt to clobber
 | 
| +          an existing file.
 | 
| +    """
 | 
| +    return self.no_clobber and (
 | 
| +        (isinstance(e, GSResponseError) and e.status==412) or
 | 
| +        (isinstance(e, ResumableUploadException) and 'code 412' in e.message))
 | 
| +
 | 
| +def _GetPathBeforeFinalDir(uri):
 | 
| +  """
 | 
| +  Returns the part of the path before the final directory component for the
 | 
| +  given URI, handling cases for file system directories, bucket, and bucket
 | 
| +  subdirectories. Example: for gs://bucket/dir/ we'll return 'gs://bucket',
 | 
| +  and for file://dir we'll return file://
 | 
| +
 | 
| +  Args:
 | 
| +    uri: StorageUri.
 | 
| +
 | 
| +  Returns:
 | 
| +    String name of above-described path, sans final path separator.
 | 
| +  """
 | 
| +  sep = uri.delim
 | 
| +  assert not uri.names_file()
 | 
| +  if uri.names_directory():
 | 
| +    past_scheme = uri.uri[len('file://'):]
 | 
| +    if past_scheme.find(sep) == -1:
 | 
| +      return 'file://'
 | 
| +    else:
 | 
| +      return 'file://%s' % past_scheme.rstrip(sep).rpartition(sep)[0]
 | 
| +  if uri.names_bucket():
 | 
| +    return '%s://' % uri.scheme
 | 
| +  # Else it names a bucket subdir.
 | 
| +  return uri.uri.rstrip(sep).rpartition(sep)[0]
 | 
| +
 | 
| +def _hash_filename(filename):
 | 
| +  """
 | 
| +  Apply a hash function (SHA1) to shorten the passed file name. The spec
 | 
| +  for the hashed file name is as follows:
 | 
| +
 | 
| +      TRACKER_<hash>_<trailing>
 | 
| +
 | 
| +  where hash is a SHA1 hash on the original file name and trailing is
 | 
| +  the last 16 chars from the original file name. Max file name lengths
 | 
| +  vary by operating system so the goal of this function is to ensure
 | 
| +  the hashed version takes fewer than 100 characters.
 | 
| +
 | 
| +  Args:
 | 
| +    filename: file name to be hashed.
 | 
| +
 | 
| +  Returns:
 | 
| +    shorter, hashed version of passed file name
 | 
| +  """
 | 
| +  if not isinstance(filename, unicode):
 | 
| +    filename = unicode(filename, 'utf8').encode('utf-8')
 | 
| +  m = hashlib.sha1(filename)
 | 
| +  return "TRACKER_" + m.hexdigest() + '.' + filename[-16:]
 | 
| 
 |