OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2011 Google Inc. All Rights Reserved. |
| 3 # Copyright 2011, Nexenta Systems Inc. |
| 4 # |
| 5 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 # you may not use this file except in compliance with the License. |
| 7 # You may obtain a copy of the License at |
| 8 # |
| 9 # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 # |
| 11 # Unless required by applicable law or agreed to in writing, software |
| 12 # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 # See the License for the specific language governing permissions and |
| 15 # limitations under the License. |
| 16 """Implementation of Unix-like cp command for cloud storage providers.""" |
| 17 |
| 18 from __future__ import absolute_import |
| 19 |
| 20 import os |
| 21 import time |
| 22 import traceback |
| 23 |
| 24 from gslib import copy_helper |
| 25 from gslib.cat_helper import CatHelper |
| 26 from gslib.cloud_api import AccessDeniedException |
| 27 from gslib.cloud_api import NotFoundException |
| 28 from gslib.command import Command |
| 29 from gslib.command_argument import CommandArgument |
| 30 from gslib.commands.compose import MAX_COMPONENT_COUNT |
| 31 from gslib.copy_helper import CreateCopyHelperOpts |
| 32 from gslib.copy_helper import ItemExistsError |
| 33 from gslib.copy_helper import Manifest |
| 34 from gslib.copy_helper import PARALLEL_UPLOAD_TEMP_NAMESPACE |
| 35 from gslib.copy_helper import SkipUnsupportedObjectError |
| 36 from gslib.cs_api_map import ApiSelector |
| 37 from gslib.exception import CommandException |
| 38 from gslib.name_expansion import NameExpansionIterator |
| 39 from gslib.storage_url import ContainsWildcard |
| 40 from gslib.util import CreateLock |
| 41 from gslib.util import GetCloudApiInstance |
| 42 from gslib.util import IsCloudSubdirPlaceholder |
| 43 from gslib.util import MakeHumanReadable |
| 44 from gslib.util import NO_MAX |
| 45 from gslib.util import RemoveCRLFFromString |
| 46 from gslib.util import StdinIterator |
| 47 |
| 48 _SYNOPSIS = """ |
| 49 gsutil cp [OPTION]... src_url dst_url |
| 50 gsutil cp [OPTION]... src_url... dst_url |
| 51 gsutil cp [OPTION]... -I dst_url |
| 52 """ |
| 53 |
| 54 _SYNOPSIS_TEXT = """ |
| 55 <B>SYNOPSIS</B> |
| 56 """ + _SYNOPSIS |
| 57 |
| 58 _DESCRIPTION_TEXT = """ |
| 59 <B>DESCRIPTION</B> |
| 60 The gsutil cp command allows you to copy data between your local file |
| 61 system and the cloud, copy data within the cloud, and copy data between |
| 62 cloud storage providers. For example, to copy all text files from the |
| 63 local directory to a bucket you could do: |
| 64 |
| 65 gsutil cp *.txt gs://my_bucket |
| 66 |
| 67 Similarly, you can download text files from a bucket by doing: |
| 68 |
| 69 gsutil cp gs://my_bucket/*.txt . |
| 70 |
| 71 If you want to copy an entire directory tree you need to use the -r option: |
| 72 |
| 73 gsutil cp -r dir gs://my_bucket |
| 74 |
| 75 If you have a large number of files to upload you might want to use the |
| 76 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) |
| 77 copy: |
| 78 |
| 79 gsutil -m cp -r dir gs://my_bucket |
| 80 |
| 81 You can pass a list of URLs (one per line) to copy on stdin instead of as |
| 82 command line arguments by using the -I option. This allows you to use gsutil |
| 83 in a pipeline to upload or download files / objects as generated by a program, |
| 84 such as: |
| 85 |
| 86 some_program | gsutil -m cp -I gs://my_bucket |
| 87 |
| 88 or: |
| 89 |
| 90 some_program | gsutil -m cp -I ./download_dir |
| 91 |
| 92 The contents of stdin can name files, cloud URLs, and wildcards of files |
| 93 and cloud URLs. |
| 94 """ |
| 95 |
| 96 _NAME_CONSTRUCTION_TEXT = """ |
| 97 <B>HOW NAMES ARE CONSTRUCTED</B> |
| 98 The gsutil cp command strives to name objects in a way consistent with how |
| 99 Linux cp works, which causes names to be constructed in varying ways depending |
| 100 on whether you're performing a recursive directory copy or copying |
| 101 individually named objects; and whether you're copying to an existing or |
| 102 non-existent directory. |
| 103 |
| 104 When performing recursive directory copies, object names are constructed |
| 105 that mirror the source directory structure starting at the point of |
| 106 recursive processing. For example, the command: |
| 107 |
| 108 gsutil cp -r dir1/dir2 gs://my_bucket |
| 109 |
| 110 will create objects named like gs://my_bucket/dir2/a/b/c, assuming |
| 111 dir1/dir2 contains the file a/b/c. |
| 112 |
| 113 In contrast, copying individually named files will result in objects named |
| 114 by the final path component of the source files. For example, the command: |
| 115 |
| 116 gsutil cp dir1/dir2/** gs://my_bucket |
| 117 |
| 118 will create objects named like gs://my_bucket/c. |
| 119 |
| 120 The same rules apply for downloads: recursive copies of buckets and |
| 121 bucket subdirectories produce a mirrored filename structure, while copying |
| 122 individually (or wildcard) named objects produce flatly named files. |
| 123 |
| 124 Note that in the above example the '**' wildcard matches all names |
| 125 anywhere under dir. The wildcard '*' will match names just one level deep. For |
| 126 more details see 'gsutil help wildcards'. |
| 127 |
| 128 There's an additional wrinkle when working with subdirectories: the resulting |
| 129 names depend on whether the destination subdirectory exists. For example, |
| 130 if gs://my_bucket/subdir exists as a subdirectory, the command: |
| 131 |
| 132 gsutil cp -r dir1/dir2 gs://my_bucket/subdir |
| 133 |
| 134 will create objects named like gs://my_bucket/subdir/dir2/a/b/c. In contrast, |
| 135 if gs://my_bucket/subdir does not exist, this same gsutil cp command will |
| 136 create objects named like gs://my_bucket/subdir/a/b/c. |
| 137 |
| 138 Note: If you use the |
| 139 `Google Developers Console <https://console.developers.google.com>`_ |
| 140 to create folders, it does so by creating a "placeholder" object that ends |
| 141 with a "/" character. gsutil skips these objects when downloading from the |
| 142 cloud to the local file system, because attempting to create a file that |
| 143 ends with a "/" is not allowed on Linux and MacOS. Because of this, it is |
| 144 recommended that you not create objects that end with "/" (unless you don't |
| 145 need to be able to download such objects using gsutil). |
| 146 """ |
| 147 |
| 148 _SUBDIRECTORIES_TEXT = """ |
| 149 <B>COPYING TO/FROM SUBDIRECTORIES; DISTRIBUTING TRANSFERS ACROSS MACHINES</B> |
| 150 You can use gsutil to copy to and from subdirectories by using a command |
| 151 like: |
| 152 |
| 153 gsutil cp -r dir gs://my_bucket/data |
| 154 |
| 155 This will cause dir and all of its files and nested subdirectories to be |
| 156 copied under the specified destination, resulting in objects with names like |
| 157 gs://my_bucket/data/dir/a/b/c. Similarly you can download from bucket |
| 158 subdirectories by using a command like: |
| 159 |
| 160 gsutil cp -r gs://my_bucket/data dir |
| 161 |
| 162 This will cause everything nested under gs://my_bucket/data to be downloaded |
| 163 into dir, resulting in files with names like dir/data/a/b/c. |
| 164 |
| 165 Copying subdirectories is useful if you want to add data to an existing |
| 166 bucket directory structure over time. It's also useful if you want |
| 167 to parallelize uploads and downloads across multiple machines (often |
| 168 reducing overall transfer time compared with simply running gsutil -m |
| 169 cp on one machine). For example, if your bucket contains this structure: |
| 170 |
| 171 gs://my_bucket/data/result_set_01/ |
| 172 gs://my_bucket/data/result_set_02/ |
| 173 ... |
| 174 gs://my_bucket/data/result_set_99/ |
| 175 |
| 176 you could perform concurrent downloads across 3 machines by running these |
| 177 commands on each machine, respectively: |
| 178 |
| 179 gsutil -m cp -r gs://my_bucket/data/result_set_[0-3]* dir |
| 180 gsutil -m cp -r gs://my_bucket/data/result_set_[4-6]* dir |
| 181 gsutil -m cp -r gs://my_bucket/data/result_set_[7-9]* dir |
| 182 |
| 183 Note that dir could be a local directory on each machine, or it could |
| 184 be a directory mounted off of a shared file server; whether the latter |
| 185 performs acceptably may depend on a number of things, so we recommend |
| 186 you experiment and find out what works best for you. |
| 187 """ |
| 188 |
| 189 _COPY_IN_CLOUD_TEXT = """ |
| 190 <B>COPYING IN THE CLOUD AND METADATA PRESERVATION</B> |
| 191 If both the source and destination URL are cloud URLs from the same |
| 192 provider, gsutil copies data "in the cloud" (i.e., without downloading |
| 193 to and uploading from the machine where you run gsutil). In addition to |
| 194 the performance and cost advantages of doing this, copying in the cloud |
| 195 preserves metadata (like Content-Type and Cache-Control). In contrast, |
| 196 when you download data from the cloud it ends up in a file, which has |
| 197 no associated metadata. Thus, unless you have some way to hold on to |
| 198 or re-create that metadata, downloading to a file will not retain the |
| 199 metadata. |
| 200 |
| 201 Copies spanning locations and/or storage classes cause data to be rewritten |
| 202 in the cloud, which may take some time. Such operations can be resumed with |
| 203 the same command if they are interrupted, so long as the command parameters |
| 204 are identical. |
| 205 |
| 206 Note that by default, the gsutil cp command does not copy the object |
| 207 ACL to the new object, and instead will use the default bucket ACL (see |
| 208 "gsutil help defacl"). You can override this behavior with the -p |
| 209 option (see OPTIONS below). |
| 210 |
| 211 One additional note about copying in the cloud: If the destination bucket has |
| 212 versioning enabled, gsutil cp will copy all versions of the source object(s). |
| 213 For example: |
| 214 |
| 215 gsutil cp gs://bucket1/obj gs://bucket2 |
| 216 |
| 217 will cause all versions of gs://bucket1/obj to be copied to gs://bucket2. |
| 218 """ |
| 219 |
| 220 _CHECKSUM_VALIDATION_TEXT = """ |
| 221 <B>CHECKSUM VALIDATION</B> |
| 222 At the end of every upload or download the gsutil cp command validates that |
| 223 the checksum it computes for the source file/object matches the checksum |
| 224 the service computes. If the checksums do not match, gsutil will delete the |
| 225 corrupted object and print a warning message. This very rarely happens, but |
| 226 if it does, please contact gs-team@google.com. |
| 227 |
| 228 If you know the MD5 of a file before uploading you can specify it in the |
| 229 Content-MD5 header, which will cause the cloud storage service to reject the |
| 230 upload if the MD5 doesn't match the value computed by the service. For |
| 231 example: |
| 232 |
| 233 % gsutil hash obj |
| 234 Hashing obj: |
| 235 Hashes [base64] for obj: |
| 236 Hash (crc32c): lIMoIw== |
| 237 Hash (md5): VgyllJgiiaRAbyUUIqDMmw== |
| 238 |
| 239 % gsutil -h Content-MD5:VgyllJgiiaRAbyUUIqDMmw== cp obj gs://your-bucket/obj |
| 240 Copying file://obj [Content-Type=text/plain]... |
| 241 Uploading gs://your-bucket/obj: 182 b/182 B |
| 242 |
| 243 If the checksum didn't match the service would instead reject the upload and |
| 244 gsutil would print a message like: |
| 245 |
| 246 BadRequestException: 400 Provided MD5 hash "VgyllJgiiaRAbyUUIqDMmw==" |
| 247 doesn't match calculated MD5 hash "7gyllJgiiaRAbyUUIqDMmw==". |
| 248 |
| 249 Even if you don't do this gsutil will delete the object if the computed |
| 250 checksum mismatches, but specifying the Content-MD5 header has three |
| 251 advantages: |
| 252 |
| 253 1. It prevents the corrupted object from becoming visible at all, whereas |
| 254 otherwise it would be visible for 1-3 seconds before gsutil deletes it. |
| 255 |
| 256 2. It will definitively prevent the corrupted object from being left in |
| 257 the cloud, whereas the gsutil approach of deleting after the upload |
| 258 completes could fail if (for example) the gsutil process gets ^C'd |
| 259 between upload and deletion request. |
| 260 |
| 261 3. It supports a customer-to-service integrity check handoff. For example, |
| 262 if you have a content production pipeline that generates data to be |
| 263 uploaded to the cloud along with checksums of that data, specifying the |
| 264 MD5 computed by your content pipeline when you run gsutil cp will ensure |
| 265 that the checksums match all the way through the process (e.g., detecting |
| 266 if data gets corrupted on your local disk between the time it was written |
| 267 by your content pipeline and the time it was uploaded to GCS). |
| 268 |
| 269 Note: The Content-MD5 header is ignored for composite objects, because such |
| 270 objects only have a CRC32C checksum. |
| 271 """ |
| 272 |
| 273 _RETRY_HANDLING_TEXT = """ |
| 274 <B>RETRY HANDLING</B> |
| 275 The cp command will retry when failures occur, but if enough failures happen |
| 276 during a particular copy or delete operation the command will skip that object |
| 277 and move on. At the end of the copy run if any failures were not successfully |
| 278 retried, the cp command will report the count of failures, and exit with |
| 279 non-zero status. |
| 280 |
| 281 Note that there are cases where retrying will never succeed, such as if you |
| 282 don't have write permission to the destination bucket or if the destination |
| 283 path for some objects is longer than the maximum allowed length. |
| 284 |
| 285 For more details about gsutil's retry handling, please see |
| 286 "gsutil help retries". |
| 287 """ |
| 288 |
| 289 _RESUMABLE_TRANSFERS_TEXT = """ |
| 290 <B>RESUMABLE TRANSFERS</B> |
| 291 gsutil automatically uses the Google Cloud Storage resumable upload feature |
| 292 whenever you use the cp command to upload an object that is larger than 2 |
| 293 MiB. You do not need to specify any special command line options to make this |
| 294 happen. If your upload is interrupted you can restart the upload by running |
| 295 the same cp command that you ran to start the upload. Until the upload |
| 296 has completed successfully, it will not be visible at the destination object |
| 297 and will not replace any existing object the upload is intended to overwrite. |
| 298 (However, see the section on PARALLEL COMPOSITE UPLOADS, which may leave |
| 299 temporary component objects in place during the upload process.) |
| 300 |
| 301 Similarly, gsutil automatically performs resumable downloads (using HTTP |
| 302 standard Range GET operations) whenever you use the cp command, unless the |
| 303 destination is a stream or null. In this case the partially downloaded file |
| 304 will be visible as soon as it starts being written. Thus, before you attempt |
| 305 to use any files downloaded by gsutil you should make sure the download |
| 306 completed successfully, by checking the exit status from the gsutil command. |
| 307 This can be done in a bash script, for example, by doing: |
| 308 |
| 309 gsutil cp gs://your-bucket/your-object ./local-file |
| 310 if [ "$status" -ne "0" ] ; then |
| 311 << Code that handles failures >> |
| 312 fi |
| 313 |
| 314 Resumable uploads and downloads store some state information in a file |
| 315 in ~/.gsutil named by the destination object or file. If you attempt to |
| 316 resume a transfer from a machine with a different directory, the transfer |
| 317 will start over from scratch. |
| 318 |
| 319 See also "gsutil help prod" for details on using resumable transfers |
| 320 in production. |
| 321 """ |
| 322 |
| 323 _STREAMING_TRANSFERS_TEXT = """ |
| 324 <B>STREAMING TRANSFERS</B> |
| 325 Use '-' in place of src_url or dst_url to perform a streaming |
| 326 transfer. For example: |
| 327 |
| 328 long_running_computation | gsutil cp - gs://my_bucket/obj |
| 329 |
| 330 Streaming uploads using the JSON API (see "gsutil help apis") are buffered in |
| 331 memory and can retry in the event of network flakiness or service errors. |
| 332 |
| 333 Streaming transfers (other than uploads using the JSON API) do not support |
| 334 resumable uploads/downloads. If you have a large amount of data to upload |
| 335 (say, more than 100 MiB) it is recommended to write the data to a local file |
| 336 and then copy that file to the cloud rather than streaming it (and similarly |
| 337 for large downloads). |
| 338 |
| 339 WARNING: When performing streaming transfers gsutil does not compute a |
| 340 checksum of the uploaded or downloaded data. Therefore, we recommend that |
| 341 users either perform their own validation of the data or use non-streaming |
| 342 transfers (which perform integrity checking automatically). |
| 343 """ |
| 344 |
| 345 _PARALLEL_COMPOSITE_UPLOADS_TEXT = """ |
| 346 <B>PARALLEL COMPOSITE UPLOADS</B> |
| 347 gsutil can automatically use |
| 348 `object composition <https://developers.google.com/storage/docs/composite-obje
cts>`_ |
| 349 to perform uploads in parallel for large, local files being uploaded to Google |
| 350 Cloud Storage. This means that, if enabled (see next paragraph), a large file |
| 351 will be split into component pieces that will be uploaded in parallel. Those |
| 352 components will then be composed in the cloud, and the temporary components in |
| 353 the cloud will be deleted after successful composition. No additional local |
| 354 disk space is required for this operation. |
| 355 |
| 356 Using parallel composite uploads presents a tradeoff between upload |
| 357 performance and download configuration: If you enable parallel composite |
| 358 uploads your uploads will run faster, but someone will need to install a |
| 359 compiled crcmod (see "gsutil help crcmod") on every machine where objects are |
| 360 downloaded by gsutil or other Python applications. For some distributions this |
| 361 is easy (e.g., it comes pre-installed on MacOS), but in some cases users have |
| 362 found it difficult. Because of this at present parallel composite uploads are |
| 363 disabled by default. Google is actively working with a number of the Linux |
| 364 distributions to get crcmod included with the stock distribution. Once that is |
| 365 done we will re-enable parallel composite uploads by default in gsutil. |
| 366 |
| 367 To try parallel composite uploads you can run the command: |
| 368 |
| 369 gsutil -o GSUtil:parallel_composite_upload_threshold=150M cp bigfile gs://yo
ur-bucket |
| 370 |
| 371 where bigfile is larger than 150 MiB. When you do this notice that the upload |
| 372 progress indicator continuously updates for several different uploads at once |
| 373 (corresponding to each of the sections of the file being uploaded in |
| 374 parallel), until the parallel upload completes. If you then want to enable |
| 375 parallel composite uploads for all of your future uploads (notwithstanding the |
| 376 caveats mentioned earlier), you can uncomment and set the |
| 377 "parallel_composite_upload_threshold" config value in your .boto configuration |
| 378 file to this value. |
| 379 |
| 380 Note that the crcmod problem only impacts downloads via Python applications |
| 381 (such as gsutil). If any users who need to download the data using gsutil or |
| 382 other Python applications can install crcmod, it makes sense to enable |
| 383 parallel composite uploads (see above). For example, if you use gsutil to |
| 384 upload video assets and those assets will only ever be served via a Java |
| 385 application (there are efficient crc32c implementations available in Java), it |
| 386 would make sense to enable parallel composite uploads on your machine. |
| 387 |
| 388 If a parallel composite upload fails prior to composition, re-running the |
| 389 gsutil command will take advantage of resumable uploads for those components |
| 390 that failed, and the component objects will be deleted after the first |
| 391 successful attempt. Any temporary objects that were uploaded successfully |
| 392 before gsutil failed will still exist until the upload is completed |
| 393 successfully. The temporary objects will be named in the following fashion: |
| 394 |
| 395 <random ID>%s<hash> |
| 396 |
| 397 where <random ID> is some numerical value, and <hash> is an MD5 hash (not |
| 398 related to the hash of the contents of the file or object). |
| 399 |
| 400 To avoid leaving temporary objects around, you should make sure to check the |
| 401 exit status from the gsutil command. This can be done in a bash script, for |
| 402 example, by doing: |
| 403 |
| 404 gsutil cp ./local-file gs://your-bucket/your-object |
| 405 if [ "$status" -ne "0" ] ; then |
| 406 << Code that handles failures >> |
| 407 fi |
| 408 |
| 409 Or, for copying a directory, use this instead: |
| 410 |
| 411 gsutil cp -c -L cp.log -r ./dir gs://bucket |
| 412 if [ "$status" -ne "0" ] ; then |
| 413 << Code that handles failures >> |
| 414 fi |
| 415 |
| 416 One important caveat is that files uploaded in this fashion are still subject |
| 417 to the maximum number of components limit. For example, if you upload a large |
| 418 file that gets split into %d components, and try to compose it with another |
| 419 object with %d components, the operation will fail because it exceeds the %d |
| 420 component limit. If you wish to compose an object later and the component |
| 421 limit is a concern, it is recommended that you disable parallel composite |
| 422 uploads for that transfer. |
| 423 |
| 424 Also note that an object uploaded using this feature will have a CRC32C hash, |
| 425 but it will not have an MD5 hash (and because of that, requires users who |
| 426 download the object to have crcmod installed, as noted earlier). For details |
| 427 see 'gsutil help crc32c'. |
| 428 |
| 429 Note that this feature can be completely disabled by setting the |
| 430 "parallel_composite_upload_threshold" variable in the .boto config file to 0. |
| 431 """ % (PARALLEL_UPLOAD_TEMP_NAMESPACE, 10, MAX_COMPONENT_COUNT - 9, |
| 432 MAX_COMPONENT_COUNT) |
| 433 |
| 434 |
| 435 _CHANGING_TEMP_DIRECTORIES_TEXT = """ |
| 436 <B>CHANGING TEMP DIRECTORIES</B> |
| 437 gsutil writes data to a temporary directory in several cases: |
| 438 |
| 439 - when compressing data to be uploaded (see the -z option) |
| 440 - when decompressing data being downloaded (when the data has |
| 441 Content-Encoding:gzip, e.g., as happens when uploaded using gsutil cp -z) |
| 442 - when running integration tests (using the gsutil test command) |
| 443 |
| 444 In these cases it's possible the temp file location on your system that |
| 445 gsutil selects by default may not have enough space. If you find that |
| 446 gsutil runs out of space during one of these operations (e.g., raising |
| 447 "CommandException: Inadequate temp space available to compress <your file>" |
| 448 during a gsutil cp -z operation), you can change where it writes these |
| 449 temp files by setting the TMPDIR environment variable. On Linux and MacOS |
| 450 you can do this either by running gsutil this way: |
| 451 |
| 452 TMPDIR=/some/directory gsutil cp ... |
| 453 |
| 454 or by adding this line to your ~/.bashrc file and then restarting the shell |
| 455 before running gsutil: |
| 456 |
| 457 export TMPDIR=/some/directory |
| 458 |
| 459 On Windows 7 you can change the TMPDIR environment variable from Start -> |
| 460 Computer -> System -> Advanced System Settings -> Environment Variables. |
| 461 You need to reboot after making this change for it to take effect. (Rebooting |
| 462 is not necessary after running the export command on Linux and MacOS.) |
| 463 """ |
| 464 |
| 465 _OPTIONS_TEXT = """ |
| 466 <B>OPTIONS</B> |
| 467 -a canned_acl Sets named canned_acl when uploaded objects created. See |
| 468 'gsutil help acls' for further details. |
| 469 |
| 470 -c If an error occurs, continue to attempt to copy the remaining |
| 471 files. If any copies were unsuccessful, gsutil's exit status |
| 472 will be non-zero even if this flag is set. This option is |
| 473 implicitly set when running "gsutil -m cp...". Note: -c only |
| 474 applies to the actual copying operation. If an error occurs |
| 475 while iterating over the files in the local directory (e.g., |
| 476 invalid Unicode file name) gsutil will print an error message |
| 477 and abort. |
| 478 |
| 479 -D Copy in "daisy chain" mode, i.e., copying between two buckets |
| 480 by hooking a download to an upload, via the machine where |
| 481 gsutil is run. By default, data are copied between two buckets |
| 482 "in the cloud", i.e., without needing to copy via the machine |
| 483 where gsutil runs. |
| 484 |
| 485 By default, a "copy in the cloud" when the source is a |
| 486 composite object will retain the composite nature of the |
| 487 object. However, Daisy chain mode can be used to change a |
| 488 composite object into a non-composite object. For example: |
| 489 |
| 490 gsutil cp -D -p gs://bucket/obj gs://bucket/obj_tmp |
| 491 gsutil mv -p gs://bucket/obj_tmp gs://bucket/obj |
| 492 |
| 493 Note: Daisy chain mode is automatically used when copying |
| 494 between providers (e.g., to copy data from Google Cloud Storage |
| 495 to another provider). |
| 496 |
| 497 -e Exclude symlinks. When specified, symbolic links will not be |
| 498 copied. |
| 499 |
| 500 -I Causes gsutil to read the list of files or objects to copy from |
| 501 stdin. This allows you to run a program that generates the list |
| 502 of files to upload/download. |
| 503 |
| 504 -L <file> Outputs a manifest log file with detailed information about |
| 505 each item that was copied. This manifest contains the following |
| 506 information for each item: |
| 507 |
| 508 - Source path. |
| 509 - Destination path. |
| 510 - Source size. |
| 511 - Bytes transferred. |
| 512 - MD5 hash. |
| 513 - UTC date and time transfer was started in ISO 8601 format. |
| 514 - UTC date and time transfer was completed in ISO 8601 format. |
| 515 - Upload id, if a resumable upload was performed. |
| 516 - Final result of the attempted transfer, success or failure. |
| 517 - Failure details, if any. |
| 518 |
| 519 If the log file already exists, gsutil will use the file as an |
| 520 input to the copy process, and will also append log items to |
| 521 the existing file. Files/objects that are marked in the |
| 522 existing log file as having been successfully copied (or |
| 523 skipped) will be ignored. Files/objects without entries will be |
| 524 copied and ones previously marked as unsuccessful will be |
| 525 retried. This can be used in conjunction with the -c option to |
| 526 build a script that copies a large number of objects reliably, |
| 527 using a bash script like the following: |
| 528 |
| 529 until gsutil cp -c -L cp.log -r ./dir gs://bucket; do |
| 530 sleep 1 |
| 531 done |
| 532 |
| 533 The -c option will cause copying to continue after failures |
| 534 occur, and the -L option will allow gsutil to pick up where it |
| 535 left off without duplicating work. The loop will continue |
| 536 running as long as gsutil exits with a non-zero status (such a |
| 537 status indicates there was at least one failure during the |
| 538 gsutil run). |
| 539 |
| 540 Note: If you're trying to synchronize the contents of a |
| 541 directory and a bucket (or two buckets), see |
| 542 'gsutil help rsync'. |
| 543 |
| 544 -n No-clobber. When specified, existing files or objects at the |
| 545 destination will not be overwritten. Any items that are skipped |
| 546 by this option will be reported as being skipped. This option |
| 547 will perform an additional GET request to check if an item |
| 548 exists before attempting to upload the data. This will save |
| 549 retransmitting data, but the additional HTTP requests may make |
| 550 small object transfers slower and more expensive. |
| 551 |
| 552 -p Causes ACLs to be preserved when copying in the cloud. Note |
| 553 that this option has performance and cost implications when |
| 554 using the XML API, as it requires separate HTTP calls for |
| 555 interacting with ACLs. The performance issue can be mitigated |
| 556 to some degree by using gsutil -m cp to cause parallel copying. |
| 557 Also, this option only works if you have OWNER access to all of |
| 558 the objects that are copied. |
| 559 |
| 560 You can avoid the additional performance and cost of using |
| 561 cp -p if you want all objects in the destination bucket to end |
| 562 up with the same ACL by setting a default object ACL on that |
| 563 bucket instead of using cp -p. See "help gsutil defacl". |
| 564 |
| 565 Note that it's not valid to specify both the -a and -p options |
| 566 together. |
| 567 |
| 568 -R, -r Causes directories, buckets, and bucket subdirectories to be |
| 569 copied recursively. If you neglect to use this option for |
| 570 an upload, gsutil will copy any files it finds and skip any |
| 571 directories. Similarly, neglecting to specify -r for a download |
| 572 will cause gsutil to copy any objects at the current bucket |
| 573 directory level, and skip any subdirectories. |
| 574 |
| 575 -U Skip objects with unsupported object types instead of failing. |
| 576 Unsupported object types are s3 glacier objects. |
| 577 |
| 578 -v Requests that the version-specific URL for each uploaded object |
| 579 be printed. Given this URL you can make future upload requests |
| 580 that are safe in the face of concurrent updates, because Google |
| 581 Cloud Storage will refuse to perform the update if the current |
| 582 object version doesn't match the version-specific URL. See |
| 583 'gsutil help versions' for more details. |
| 584 |
| 585 -z <ext,...> Applies gzip content-encoding to file uploads with the given |
| 586 extensions. This is useful when uploading files with |
| 587 compressible content (such as .js, .css, or .html files) |
| 588 because it saves network bandwidth and space in Google Cloud |
| 589 Storage, which in turn reduces storage costs. |
| 590 |
| 591 When you specify the -z option, the data from your files is |
| 592 compressed before it is uploaded, but your actual files are |
| 593 left uncompressed on the local disk. The uploaded objects |
| 594 retain the Content-Type and name of the original files but are |
| 595 given a Content-Encoding header with the value "gzip" to |
| 596 indicate that the object data stored are compressed on the |
| 597 Google Cloud Storage servers. |
| 598 |
| 599 For example, the following command: |
| 600 |
| 601 gsutil cp -z html -a public-read cattypes.html gs://mycats |
| 602 |
| 603 will do all of the following: |
| 604 |
| 605 - Upload as the object gs://mycats/cattypes.html (cp command) |
| 606 - Set the Content-Type to text/html (based on file extension) |
| 607 - Compress the data in the file cattypes.html (-z option) |
| 608 - Set the Content-Encoding to gzip (-z option) |
| 609 - Set the ACL to public-read (-a option) |
| 610 - If a user tries to view cattypes.html in a browser, the |
| 611 browser will know to uncompress the data based on the |
| 612 Content-Encoding header, and to render it as HTML based on |
| 613 the Content-Type header. |
| 614 |
| 615 Note that if you download an object with Content-Encoding:gzip |
| 616 gsutil will decompress the content before writing the local |
| 617 file. |
| 618 """ |
| 619 |
| 620 _DETAILED_HELP_TEXT = '\n\n'.join([_SYNOPSIS_TEXT, |
| 621 _DESCRIPTION_TEXT, |
| 622 _NAME_CONSTRUCTION_TEXT, |
| 623 _SUBDIRECTORIES_TEXT, |
| 624 _COPY_IN_CLOUD_TEXT, |
| 625 _CHECKSUM_VALIDATION_TEXT, |
| 626 _RETRY_HANDLING_TEXT, |
| 627 _RESUMABLE_TRANSFERS_TEXT, |
| 628 _STREAMING_TRANSFERS_TEXT, |
| 629 _PARALLEL_COMPOSITE_UPLOADS_TEXT, |
| 630 _CHANGING_TEMP_DIRECTORIES_TEXT, |
| 631 _OPTIONS_TEXT]) |
| 632 |
| 633 |
| 634 CP_SUB_ARGS = 'a:cDeIL:MNnprRtUvz:' |
| 635 |
| 636 |
| 637 def _CopyFuncWrapper(cls, args, thread_state=None): |
| 638 cls.CopyFunc(args, thread_state=thread_state) |
| 639 |
| 640 |
| 641 def _CopyExceptionHandler(cls, e): |
| 642 """Simple exception handler to allow post-completion status.""" |
| 643 cls.logger.error(str(e)) |
| 644 cls.op_failure_count += 1 |
| 645 cls.logger.debug('\n\nEncountered exception while copying:\n%s\n', |
| 646 traceback.format_exc()) |
| 647 |
| 648 |
| 649 def _RmExceptionHandler(cls, e): |
| 650 """Simple exception handler to allow post-completion status.""" |
| 651 cls.logger.error(str(e)) |
| 652 |
| 653 |
| 654 class CpCommand(Command): |
| 655 """Implementation of gsutil cp command. |
| 656 |
| 657 Note that CpCommand is run for both gsutil cp and gsutil mv. The latter |
| 658 happens by MvCommand calling CpCommand and passing the hidden (undocumented) |
| 659 -M option. This allows the copy and remove needed for each mv to run |
| 660 together (rather than first running all the cp's and then all the rm's, as |
| 661 we originally had implemented), which in turn avoids the following problem |
| 662 with removing the wrong objects: starting with a bucket containing only |
| 663 the object gs://bucket/obj, say the user does: |
| 664 gsutil mv gs://bucket/* gs://bucket/d.txt |
| 665 If we ran all the cp's and then all the rm's and we didn't expand the wildcard |
| 666 first, the cp command would first copy gs://bucket/obj to gs://bucket/d.txt, |
| 667 and the rm command would then remove that object. In the implementation |
| 668 prior to gsutil release 3.12 we avoided this by building a list of objects |
| 669 to process and then running the copies and then the removes; but building |
| 670 the list up front limits scalability (compared with the current approach |
| 671 of processing the bucket listing iterator on the fly). |
| 672 """ |
| 673 |
| 674 # Command specification. See base class for documentation. |
| 675 command_spec = Command.CreateCommandSpec( |
| 676 'cp', |
| 677 command_name_aliases=['copy'], |
| 678 usage_synopsis=_SYNOPSIS, |
| 679 min_args=1, |
| 680 max_args=NO_MAX, |
| 681 # -t is deprecated but leave intact for now to avoid breakage. |
| 682 supported_sub_args=CP_SUB_ARGS, |
| 683 file_url_ok=True, |
| 684 provider_url_ok=False, |
| 685 urls_start_arg=0, |
| 686 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| 687 gs_default_api=ApiSelector.JSON, |
| 688 supported_private_args=['testcallbackfile='], |
| 689 argparse_arguments=[ |
| 690 CommandArgument.MakeZeroOrMoreCloudOrFileURLsArgument() |
| 691 ] |
| 692 ) |
| 693 # Help specification. See help_provider.py for documentation. |
| 694 help_spec = Command.HelpSpec( |
| 695 help_name='cp', |
| 696 help_name_aliases=['copy'], |
| 697 help_type='command_help', |
| 698 help_one_line_summary='Copy files and objects', |
| 699 help_text=_DETAILED_HELP_TEXT, |
| 700 subcommand_help_text={}, |
| 701 ) |
| 702 |
| 703 # pylint: disable=too-many-statements |
| 704 def CopyFunc(self, name_expansion_result, thread_state=None): |
| 705 """Worker function for performing the actual copy (and rm, for mv).""" |
| 706 gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) |
| 707 |
| 708 copy_helper_opts = copy_helper.GetCopyHelperOpts() |
| 709 if copy_helper_opts.perform_mv: |
| 710 cmd_name = 'mv' |
| 711 else: |
| 712 cmd_name = self.command_name |
| 713 src_url = name_expansion_result.source_storage_url |
| 714 exp_src_url = name_expansion_result.expanded_storage_url |
| 715 src_url_names_container = name_expansion_result.names_container |
| 716 have_multiple_srcs = name_expansion_result.is_multi_source_request |
| 717 |
| 718 if src_url.IsCloudUrl() and src_url.IsProvider(): |
| 719 raise CommandException( |
| 720 'The %s command does not allow provider-only source URLs (%s)' % |
| 721 (cmd_name, src_url)) |
| 722 if have_multiple_srcs: |
| 723 copy_helper.InsistDstUrlNamesContainer( |
| 724 self.exp_dst_url, self.have_existing_dst_container, cmd_name) |
| 725 |
| 726 # Various GUI tools (like the GCS web console) create placeholder objects |
| 727 # ending with '/' when the user creates an empty directory. Normally these |
| 728 # tools should delete those placeholders once objects have been written |
| 729 # "under" the directory, but sometimes the placeholders are left around. We |
| 730 # need to filter them out here, otherwise if the user tries to rsync from |
| 731 # GCS to a local directory it will result in a directory/file conflict |
| 732 # (e.g., trying to download an object called "mydata/" where the local |
| 733 # directory "mydata" exists). |
| 734 if IsCloudSubdirPlaceholder(exp_src_url): |
| 735 self.logger.info('Skipping cloud sub-directory placeholder object (%s) ' |
| 736 'because such objects aren\'t needed in (and would ' |
| 737 'interfere with) directories in the local file system', |
| 738 exp_src_url) |
| 739 return |
| 740 |
| 741 if copy_helper_opts.use_manifest and self.manifest.WasSuccessful( |
| 742 exp_src_url.url_string): |
| 743 return |
| 744 |
| 745 if copy_helper_opts.perform_mv: |
| 746 if name_expansion_result.names_container: |
| 747 # Use recursion_requested when performing name expansion for the |
| 748 # directory mv case so we can determine if any of the source URLs are |
| 749 # directories (and then use cp -r and rm -r to perform the move, to |
| 750 # match the behavior of Linux mv (which when moving a directory moves |
| 751 # all the contained files). |
| 752 self.recursion_requested = True |
| 753 # Disallow wildcard src URLs when moving directories, as supporting it |
| 754 # would make the name transformation too complex and would also be |
| 755 # dangerous (e.g., someone could accidentally move many objects to the |
| 756 # wrong name, or accidentally overwrite many objects). |
| 757 if ContainsWildcard(src_url.url_string): |
| 758 raise CommandException('The mv command disallows naming source ' |
| 759 'directories using wildcards') |
| 760 |
| 761 if (self.exp_dst_url.IsFileUrl() |
| 762 and not os.path.exists(self.exp_dst_url.object_name) |
| 763 and have_multiple_srcs): |
| 764 os.makedirs(self.exp_dst_url.object_name) |
| 765 |
| 766 dst_url = copy_helper.ConstructDstUrl( |
| 767 src_url, exp_src_url, src_url_names_container, have_multiple_srcs, |
| 768 self.exp_dst_url, self.have_existing_dst_container, |
| 769 self.recursion_requested) |
| 770 dst_url = copy_helper.FixWindowsNaming(src_url, dst_url) |
| 771 |
| 772 copy_helper.CheckForDirFileConflict(exp_src_url, dst_url) |
| 773 if copy_helper.SrcDstSame(exp_src_url, dst_url): |
| 774 raise CommandException('%s: "%s" and "%s" are the same file - ' |
| 775 'abort.' % (cmd_name, exp_src_url, dst_url)) |
| 776 |
| 777 if dst_url.IsCloudUrl() and dst_url.HasGeneration(): |
| 778 raise CommandException('%s: a version-specific URL\n(%s)\ncannot be ' |
| 779 'the destination for gsutil cp - abort.' |
| 780 % (cmd_name, dst_url)) |
| 781 |
| 782 elapsed_time = bytes_transferred = 0 |
| 783 try: |
| 784 if copy_helper_opts.use_manifest: |
| 785 self.manifest.Initialize( |
| 786 exp_src_url.url_string, dst_url.url_string) |
| 787 (elapsed_time, bytes_transferred, result_url, md5) = ( |
| 788 copy_helper.PerformCopy( |
| 789 self.logger, exp_src_url, dst_url, gsutil_api, |
| 790 self, _CopyExceptionHandler, allow_splitting=True, |
| 791 headers=self.headers, manifest=self.manifest, |
| 792 gzip_exts=self.gzip_exts, test_method=self.test_method)) |
| 793 if copy_helper_opts.use_manifest: |
| 794 if md5: |
| 795 self.manifest.Set(exp_src_url.url_string, 'md5', md5) |
| 796 self.manifest.SetResult( |
| 797 exp_src_url.url_string, bytes_transferred, 'OK') |
| 798 if copy_helper_opts.print_ver: |
| 799 # Some cases don't return a version-specific URL (e.g., if destination |
| 800 # is a file). |
| 801 self.logger.info('Created: %s', result_url) |
| 802 except ItemExistsError: |
| 803 message = 'Skipping existing item: %s' % dst_url |
| 804 self.logger.info(message) |
| 805 if copy_helper_opts.use_manifest: |
| 806 self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message) |
| 807 except SkipUnsupportedObjectError, e: |
| 808 message = ('Skipping item %s with unsupported object type %s' % |
| 809 (exp_src_url.url_string, e.unsupported_type)) |
| 810 self.logger.info(message) |
| 811 if copy_helper_opts.use_manifest: |
| 812 self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message) |
| 813 except copy_helper.FileConcurrencySkipError, e: |
| 814 self.logger.warn('Skipping copy of source URL %s because destination URL ' |
| 815 '%s is already being copied by another gsutil process ' |
| 816 'or thread (did you specify the same source URL twice?) ' |
| 817 % (src_url, dst_url)) |
| 818 except Exception, e: |
| 819 if (copy_helper_opts.no_clobber and |
| 820 copy_helper.IsNoClobberServerException(e)): |
| 821 message = 'Rejected (noclobber): %s' % dst_url |
| 822 self.logger.info(message) |
| 823 if copy_helper_opts.use_manifest: |
| 824 self.manifest.SetResult( |
| 825 exp_src_url.url_string, 0, 'skip', message) |
| 826 elif self.continue_on_error: |
| 827 message = 'Error copying %s: %s' % (src_url, str(e)) |
| 828 self.op_failure_count += 1 |
| 829 self.logger.error(message) |
| 830 if copy_helper_opts.use_manifest: |
| 831 self.manifest.SetResult( |
| 832 exp_src_url.url_string, 0, 'error', |
| 833 RemoveCRLFFromString(message)) |
| 834 else: |
| 835 if copy_helper_opts.use_manifest: |
| 836 self.manifest.SetResult( |
| 837 exp_src_url.url_string, 0, 'error', str(e)) |
| 838 raise |
| 839 else: |
| 840 if copy_helper_opts.perform_mv: |
| 841 self.logger.info('Removing %s...', exp_src_url) |
| 842 if exp_src_url.IsCloudUrl(): |
| 843 gsutil_api.DeleteObject(exp_src_url.bucket_name, |
| 844 exp_src_url.object_name, |
| 845 generation=exp_src_url.generation, |
| 846 provider=exp_src_url.scheme) |
| 847 else: |
| 848 os.unlink(exp_src_url.object_name) |
| 849 |
| 850 with self.stats_lock: |
| 851 self.total_elapsed_time += elapsed_time |
| 852 self.total_bytes_transferred += bytes_transferred |
| 853 |
| 854 # Command entry point. |
| 855 def RunCommand(self): |
| 856 copy_helper_opts = self._ParseOpts() |
| 857 |
| 858 self.total_elapsed_time = self.total_bytes_transferred = 0 |
| 859 if self.args[-1] == '-' or self.args[-1] == 'file://-': |
| 860 return CatHelper(self).CatUrlStrings(self.args[:-1]) |
| 861 |
| 862 if copy_helper_opts.read_args_from_stdin: |
| 863 if len(self.args) != 1: |
| 864 raise CommandException('Source URLs cannot be specified with -I option') |
| 865 url_strs = StdinIterator() |
| 866 else: |
| 867 if len(self.args) < 2: |
| 868 raise CommandException('Wrong number of arguments for "cp" command.') |
| 869 url_strs = self.args[:-1] |
| 870 |
| 871 (self.exp_dst_url, self.have_existing_dst_container) = ( |
| 872 copy_helper.ExpandUrlToSingleBlr(self.args[-1], self.gsutil_api, |
| 873 self.debug, self.project_id)) |
| 874 |
| 875 # If the destination bucket has versioning enabled iterate with |
| 876 # all_versions=True. That way we'll copy all versions if the source bucket |
| 877 # is versioned; and by leaving all_versions=False if the destination bucket |
| 878 # has versioning disabled we will avoid copying old versions all to the same |
| 879 # un-versioned destination object. |
| 880 all_versions = False |
| 881 try: |
| 882 bucket = self._GetBucketWithVersioningConfig(self.exp_dst_url) |
| 883 if bucket and bucket.versioning and bucket.versioning.enabled: |
| 884 all_versions = True |
| 885 except AccessDeniedException: |
| 886 # This happens (in the XML API only) if the user doesn't have OWNER access |
| 887 # on the bucket (needed to check if versioning is enabled). In this case |
| 888 # fall back to copying all versions (which can be inefficient for the |
| 889 # reason noted in the comment above). We don't try to warn the user |
| 890 # because that would result in false positive warnings (since we can't |
| 891 # check if versioning is enabled on the destination bucket). |
| 892 # |
| 893 # For JSON, we will silently not return versioning if we don't have |
| 894 # access. |
| 895 all_versions = True |
| 896 |
| 897 name_expansion_iterator = NameExpansionIterator( |
| 898 self.command_name, self.debug, |
| 899 self.logger, self.gsutil_api, url_strs, |
| 900 self.recursion_requested or copy_helper_opts.perform_mv, |
| 901 project_id=self.project_id, all_versions=all_versions, |
| 902 continue_on_error=self.continue_on_error or self.parallel_operations) |
| 903 |
| 904 # Use a lock to ensure accurate statistics in the face of |
| 905 # multi-threading/multi-processing. |
| 906 self.stats_lock = CreateLock() |
| 907 |
| 908 # Tracks if any copies failed. |
| 909 self.op_failure_count = 0 |
| 910 |
| 911 # Start the clock. |
| 912 start_time = time.time() |
| 913 |
| 914 # Tuple of attributes to share/manage across multiple processes in |
| 915 # parallel (-m) mode. |
| 916 shared_attrs = ('op_failure_count', 'total_bytes_transferred') |
| 917 |
| 918 # Perform copy requests in parallel (-m) mode, if requested, using |
| 919 # configured number of parallel processes and threads. Otherwise, |
| 920 # perform requests with sequential function calls in current process. |
| 921 self.Apply(_CopyFuncWrapper, name_expansion_iterator, |
| 922 _CopyExceptionHandler, shared_attrs, |
| 923 fail_on_error=(not self.continue_on_error)) |
| 924 self.logger.debug( |
| 925 'total_bytes_transferred: %d', self.total_bytes_transferred) |
| 926 |
| 927 end_time = time.time() |
| 928 self.total_elapsed_time = end_time - start_time |
| 929 |
| 930 # Sometimes, particularly when running unit tests, the total elapsed time |
| 931 # is really small. On Windows, the timer resolution is too small and |
| 932 # causes total_elapsed_time to be zero. |
| 933 try: |
| 934 float(self.total_bytes_transferred) / float(self.total_elapsed_time) |
| 935 except ZeroDivisionError: |
| 936 self.total_elapsed_time = 0.01 |
| 937 |
| 938 self.total_bytes_per_second = (float(self.total_bytes_transferred) / |
| 939 float(self.total_elapsed_time)) |
| 940 |
| 941 if self.debug == 3: |
| 942 # Note that this only counts the actual GET and PUT bytes for the copy |
| 943 # - not any transfers for doing wildcard expansion, the initial |
| 944 # HEAD/GET request performed to get the object metadata, etc. |
| 945 if self.total_bytes_transferred != 0: |
| 946 self.logger.info( |
| 947 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)', |
| 948 self.total_bytes_transferred, self.total_elapsed_time, |
| 949 MakeHumanReadable(self.total_bytes_per_second)) |
| 950 if self.op_failure_count: |
| 951 plural_str = 's' if self.op_failure_count else '' |
| 952 raise CommandException('%d file%s/object%s could not be transferred.' % ( |
| 953 self.op_failure_count, plural_str, plural_str)) |
| 954 |
| 955 return 0 |
| 956 |
| 957 def _ParseOpts(self): |
| 958 perform_mv = False |
| 959 # exclude_symlinks is handled by Command parent class, so save in Command |
| 960 # state rather than CopyHelperOpts. |
| 961 self.exclude_symlinks = False |
| 962 no_clobber = False |
| 963 # continue_on_error is handled by Command parent class, so save in Command |
| 964 # state rather than CopyHelperOpts. |
| 965 self.continue_on_error = False |
| 966 daisy_chain = False |
| 967 read_args_from_stdin = False |
| 968 print_ver = False |
| 969 use_manifest = False |
| 970 preserve_acl = False |
| 971 canned_acl = None |
| 972 # canned_acl is handled by a helper function in parent |
| 973 # Command class, so save in Command state rather than CopyHelperOpts. |
| 974 self.canned = None |
| 975 |
| 976 self.skip_unsupported_objects = False |
| 977 |
| 978 # Files matching these extensions should be gzipped before uploading. |
| 979 self.gzip_exts = [] |
| 980 |
| 981 test_callback_file = None |
| 982 |
| 983 # self.recursion_requested initialized in command.py (so can be checked |
| 984 # in parent class for all commands). |
| 985 self.manifest = None |
| 986 if self.sub_opts: |
| 987 for o, a in self.sub_opts: |
| 988 if o == '-a': |
| 989 canned_acl = a |
| 990 self.canned = True |
| 991 if o == '-c': |
| 992 self.continue_on_error = True |
| 993 elif o == '-D': |
| 994 daisy_chain = True |
| 995 elif o == '-e': |
| 996 self.exclude_symlinks = True |
| 997 elif o == '--testcallbackfile': |
| 998 # File path of a pickled class that implements ProgressCallback.call. |
| 999 # Used for testing transfer interruptions and resumes. |
| 1000 test_callback_file = a |
| 1001 elif o == '-I': |
| 1002 read_args_from_stdin = True |
| 1003 elif o == '-L': |
| 1004 use_manifest = True |
| 1005 self.manifest = Manifest(a) |
| 1006 elif o == '-M': |
| 1007 # Note that we signal to the cp command to perform a move (copy |
| 1008 # followed by remove) and use directory-move naming rules by passing |
| 1009 # the undocumented (for internal use) -M option when running the cp |
| 1010 # command from mv.py. |
| 1011 perform_mv = True |
| 1012 elif o == '-n': |
| 1013 no_clobber = True |
| 1014 elif o == '-p': |
| 1015 preserve_acl = True |
| 1016 elif o == '-r' or o == '-R': |
| 1017 self.recursion_requested = True |
| 1018 elif o == '-U': |
| 1019 self.skip_unsupported_objects = True |
| 1020 elif o == '-v': |
| 1021 print_ver = True |
| 1022 elif o == '-z': |
| 1023 self.gzip_exts = [x.strip() for x in a.split(',')] |
| 1024 if preserve_acl and canned_acl: |
| 1025 raise CommandException( |
| 1026 'Specifying both the -p and -a options together is invalid.') |
| 1027 return CreateCopyHelperOpts( |
| 1028 perform_mv=perform_mv, |
| 1029 no_clobber=no_clobber, |
| 1030 daisy_chain=daisy_chain, |
| 1031 read_args_from_stdin=read_args_from_stdin, |
| 1032 print_ver=print_ver, |
| 1033 use_manifest=use_manifest, |
| 1034 preserve_acl=preserve_acl, |
| 1035 canned_acl=canned_acl, |
| 1036 skip_unsupported_objects=self.skip_unsupported_objects, |
| 1037 test_callback_file=test_callback_file) |
| 1038 |
| 1039 def _GetBucketWithVersioningConfig(self, exp_dst_url): |
| 1040 """Gets versioning config for a bucket and ensures that it exists. |
| 1041 |
| 1042 Args: |
| 1043 exp_dst_url: Wildcard-expanded destination StorageUrl. |
| 1044 |
| 1045 Raises: |
| 1046 AccessDeniedException: if there was a permissions problem accessing the |
| 1047 bucket or its versioning config. |
| 1048 CommandException: if URL refers to a cloud bucket that does not exist. |
| 1049 |
| 1050 Returns: |
| 1051 apitools Bucket with versioning configuration. |
| 1052 """ |
| 1053 bucket = None |
| 1054 if exp_dst_url.IsCloudUrl() and exp_dst_url.IsBucket(): |
| 1055 try: |
| 1056 bucket = self.gsutil_api.GetBucket( |
| 1057 exp_dst_url.bucket_name, provider=exp_dst_url.scheme, |
| 1058 fields=['versioning']) |
| 1059 except AccessDeniedException, e: |
| 1060 raise |
| 1061 except NotFoundException, e: |
| 1062 raise CommandException('Destination bucket %s does not exist.' % |
| 1063 exp_dst_url) |
| 1064 except Exception, e: |
| 1065 raise CommandException('Error retrieving destination bucket %s: %s' % |
| 1066 (exp_dst_url, e.message)) |
| 1067 return bucket |
OLD | NEW |