third_party/gsutil/gslib/commands/cp.py - Issue 2280023003: depot_tools: Remove third_party/gsutil

Side by Side Diff: third_party/gsutil/gslib/commands/cp.py

Issue 2280023003: depot_tools: Remove third_party/gsutil (Closed)

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 # Copyright 2011 Google Inc. All Rights Reserved.

2 # Copyright 2011, Nexenta Systems Inc.

3 #

4 # Licensed under the Apache License, Version 2.0 (the "License");

5 # you may not use this file except in compliance with the License.

6 # You may obtain a copy of the License at

7 #

8 # http://www.apache.org/licenses/LICENSE-2.0

9 #

10 # Unless required by applicable law or agreed to in writing, software

11 # distributed under the License is distributed on an "AS IS" BASIS,

12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

13 # See the License for the specific language governing permissions and

14 # limitations under the License.

15

16 import boto

17 import errno

18 import gzip

19 import hashlib

20 import mimetypes

21 import os

22 import platform

23 import re

24 import subprocess

25 import stat

26 import sys

27 import tempfile

28 import threading

29 import time

30

31 from boto import config

32 from boto.exception import GSResponseError

33 from boto.exception import ResumableUploadException

34 from boto.gs.resumable_upload_handler import ResumableUploadHandler

35 from boto.s3.keyfile import KeyFile

36 from boto.s3.resumable_download_handler import ResumableDownloadHandler

37 from boto.storage_uri import BucketStorageUri

38 from gslib.command import COMMAND_NAME

39 from gslib.command import COMMAND_NAME_ALIASES

40 from gslib.command import CONFIG_REQUIRED

41 from gslib.command import Command

42 from gslib.command import FILE_URIS_OK

43 from gslib.command import MAX_ARGS

44 from gslib.command import MIN_ARGS

45 from gslib.command import PROVIDER_URIS_OK

46 from gslib.command import SUPPORTED_SUB_ARGS

47 from gslib.command import URIS_START_ARG

48 from gslib.exception import CommandException

49 from gslib.help_provider import HELP_NAME

50 from gslib.help_provider import HELP_NAME_ALIASES

51 from gslib.help_provider import HELP_ONE_LINE_SUMMARY

52 from gslib.help_provider import HELP_TEXT

53 from gslib.help_provider import HELP_TYPE

54 from gslib.help_provider import HelpType

55 from gslib.name_expansion import NameExpansionIterator

56 from gslib.util import ExtractErrorDetail

57 from gslib.util import IS_WINDOWS

58 from gslib.util import MakeHumanReadable

59 from gslib.util import NO_MAX

60 from gslib.util import TWO_MB

61 from gslib.wildcard_iterator import ContainsWildcard

62

63 _detailed_help_text = ("""

64 <B>SYNOPSIS</B>

65 gsutil cp [OPTION]... src_uri dst_uri

66 - or -

67 gsutil cp [OPTION]... src_uri... dst_uri

68 - or -

69 gsutil cp [OPTION]... -I dst_uri

70

71

72 <B>DESCRIPTION</B>

73 The gsutil cp command allows you to copy data between your local file

74 system and the cloud, copy data within the cloud, and copy data between

75 cloud storage providers. For example, to copy all text files from the

76 local directory to a bucket you could do:

77

78 gsutil cp *.txt gs://my_bucket

79

80 Similarly, you can download text files from a bucket by doing:

81

82 gsutil cp gs://my_bucket/*.txt .

83

84 If you want to copy an entire directory tree you need to use the -R option:

85

86 gsutil cp -R dir gs://my_bucket

87

88 If you have a large number of files to upload you might want to use the

89 gsutil -m option, to perform a parallel (multi-threaded/multi-processing)

90 copy:

91

92 gsutil -m cp -R dir gs://my_bucket

93

94 You can pass a list of URIs to copy on STDIN instead of as command line

95 arguments by using the -I option. This allows you to use gsutil in a

96 pipeline to copy files and objects as generated by a program, such as:

97

98 some_program \| gsutil -m cp -I gs://my_bucket

99

100 The contents of STDIN can name files, cloud URIs, and wildcards of files

101 and cloud URIs.

102

103

104 <B>HOW NAMES ARE CONSTRUCTED</B>

105 The gsutil cp command strives to name objects in a way consistent with how

106 Linux cp works, which causes names to be constructed in varying ways depending

107 on whether you're performing a recursive directory copy or copying

108 individually named objects; and whether you're copying to an existing or

109 non-existent directory.

110

111 When performing recursive directory copies, object names are constructed

112 that mirror the source directory structure starting at the point of

113 recursive processing. For example, the command:

114

115 gsutil cp -R dir1/dir2 gs://my_bucket

116

117 will create objects named like gs://my_bucket/dir2/a/b/c, assuming

118 dir1/dir2 contains the file a/b/c.

119

120 In contrast, copying individually named files will result in objects named

121 by the final path component of the source files. For example, the command:

122

123 gsutil cp dir1/dir2/** gs://my_bucket

124

125 will create objects named like gs://my_bucket/c.

126

127 The same rules apply for downloads: recursive copies of buckets and

128 bucket subdirectories produce a mirrored filename structure, while copying

129 individually (or wildcard) named objects produce flatly named files.

130

131 Note that in the above example the '**' wildcard matches all names

132 anywhere under dir. The wildcard '*' will match names just one level deep. For

133 more details see 'gsutil help wildcards'.

134

135 There's an additional wrinkle when working with subdirectories: the resulting

136 names depend on whether the destination subdirectory exists. For example,

137 if gs://my_bucket/subdir exists as a subdirectory, the command:

138

139 gsutil cp -R dir1/dir2 gs://my_bucket/subdir

140

141 will create objects named like gs://my_bucket/subdir/dir2/a/b/c. In contrast,

142 if gs://my_bucket/subdir does not exist, this same gsutil cp command will

143 create objects named like gs://my_bucket/subdir/a/b/c.

144

145

146 <B>COPYING TO/FROM SUBDIRECTORIES; DISTRIBUTING TRANSFERS ACROSS MACHINES</B>

147 You can use gsutil to copy to and from subdirectories by using a command like:

148

149 gsutil cp -R dir gs://my_bucket/data

150

151 This will cause dir and all of its files and nested subdirectories to be

152 copied under the specified destination, resulting in objects with names like

153 gs://my_bucket/data/dir/a/b/c. Similarly you can download from bucket

154 subdirectories by using a command like:

155

156 gsutil cp -R gs://my_bucket/data dir

157

158 This will cause everything nested under gs://my_bucket/data to be downloaded

159 into dir, resulting in files with names like dir/data/a/b/c.

160

161 Copying subdirectories is useful if you want to add data to an existing

162 bucket directory structure over time. It's also useful if you want

163 to parallelize uploads and downloads across multiple machines (often

164 reducing overall transfer time compared with simply running gsutil -m

165 cp on one machine). For example, if your bucket contains this structure:

166

167 gs://my_bucket/data/result_set_01/

168 gs://my_bucket/data/result_set_02/

169 ...

170 gs://my_bucket/data/result_set_99/

171

172 you could perform concurrent downloads across 3 machines by running these

173 commands on each machine, respectively:

174

175 gsutil -m cp -R gs://my_bucket/data/result_set_[0-3]* dir

176 gsutil -m cp -R gs://my_bucket/data/result_set_[4-6]* dir

177 gsutil -m cp -R gs://my_bucket/data/result_set_[7-9]* dir

178

179 Note that dir could be a local directory on each machine, or it could

180 be a directory mounted off of a shared file server; whether the latter

181 performs acceptably may depend on a number of things, so we recommend

182 you experiment and find out what works best for you.

183

184

185 <B>COPYING IN THE CLOUD AND METADATA PRESERVATION</B>

186 If both the source and destination URI are cloud URIs from the same

187 provider, gsutil copies data "in the cloud" (i.e., without downloading

188 to and uploading from the machine where you run gsutil). In addition to

189 the performance and cost advantages of doing this, copying in the cloud

190 preserves metadata (like Content-Type and Cache-Control). In contrast,

191 when you download data from the cloud it ends up in a file, which has

192 no associated metadata. Thus, unless you have some way to hold on to

193 or re-create that metadata, downloading to a file will not retain the

194 metadata.

195

196 Note that by default, the gsutil cp command does not copy the object

197 ACL to the new object, and instead will use the default bucket ACL (see

198 "gsutil help setdefacl"). You can override this behavior with the -p

199 option (see OPTIONS below).

200

201 gsutil does not preserve metadata when copying objects between providers.

202

203

204 <B>RESUMABLE TRANSFERS</B>

205 gsutil automatically uses the Google Cloud Storage resumable upload

206 feature whenever you use the cp command to upload an object that is larger

207 than 2 MB. You do not need to specify any special command line options

208 to make this happen. If your upload is interrupted you can restart the

209 upload by running the same cp command that you ran to start the upload.

210

211 Similarly, gsutil automatically performs resumable downloads (using HTTP

212 standard Range GET operations) whenever you use the cp command to download an

213 object larger than 2 MB.

214

215 Resumable uploads and downloads store some state information in a file

216 in ~/.gsutil named by the destination object or file. If you attempt to

217 resume a transfer from a machine with a different directory, the transfer

218 will start over from scratch.

219

220 See also "gsutil help prod" for details on using resumable transfers

221 in production.

222

223

224 <B>STREAMING TRANSFERS</B>

225 Use '-' in place of src_uri or dst_uri to perform a streaming

226 transfer. For example:

227 long_running_computation \| gsutil cp - gs://my_bucket/obj

228

229 Streaming transfers do not support resumable uploads/downloads.

230 (The Google resumable transfer protocol has a way to support streaming

231 transers, but gsutil doesn't currently implement support for this.)

232

233

234 <B>CHANGING TEMP DIRECTORIES</B>

235 gsutil writes data to a temporary directory in several cases:

236 - when compressing data to be uploaded (see the -z option)

237 - when decompressing data being downloaded (when the data has

238 Content-Encoding:gzip, e.g., as happens when uploaded using gsutil cp -z)

239 - when running integration tests (using the gsutil test command)

240

241 In these cases it's possible the temp file location on your system that

242 gsutil selects by default may not have enough space. If you find that

243 gsutil runs out of space during one of these operations (e.g., raising

244 "CommandException: Inadequate temp space available to compress <your file>"

245 during a gsutil cp -z operation), you can change where it writes these

246 temp files by setting the TMPDIR environment variable. On Linux and MacOS

247 you can do this either by running gsutil this way:

248

249 TMPDIR=/some/directory gsutil cp ...

250

251 or by adding this line to your ~/.bashrc file and then restarting the shell

252 before running gsutil:

253

254 export TMPDIR=/some/directory

255

256 On Windows 7 you can change the TMPDIR environment variable from Start ->

257 Computer -> System -> Advanced System Settings -> Environment Variables.

258 You need to reboot after making this change for it to take effect. (Rebooting

259 is not necessary after running the export command on Linux and MacOS.)

260

261

262 <B>OPTIONS</B>

263 -a canned_acl Sets named canned_acl when uploaded objects created. See

264 'gsutil help acls' for further details.

265

266 -c If an error occurrs, continue to attempt to copy the remaining

267 files.

268

269 -D Copy in "daisy chain" mode, i.e., copying between two buckets by

270 hooking a download to an upload, via the machine where gsutil is

271 run. By default, data are copied between two buckets "in the

272 cloud", i.e., without needing to copy via the machine where

273 gsutil runs. However, copy-in-the-cloud is not supported when

274 copying between different locations (like US and EU) or between

275 different storage classes (like STANDARD and

276 DURABLE_REDUCED_AVAILABILITY). For these cases, you can use the

277 -D option to copy data between buckets.

278 Note: Daisy chain mode is automatically used when copying

279 between providers (e.g., to copy data from Google Cloud Storage

280 to another provider).

281

282 -e Exclude symlinks. When specified, symbolic links will not be

283 copied.

284

285 -n No-clobber. When specified, existing files or objects at the

286 destination will not be overwritten. Any items that are skipped

287 by this option will be reported as being skipped. This option

288 will perform an additional HEAD request to check if an item

289 exists before attempting to upload the data. This will save

290 retransmitting data, but the additional HTTP requests may make

291 small object transfers slower and more expensive.

292

293 This option can be combined with the -c option to build a script

294 that copies a large number of objects, allowing retries when

295 some failures occur from which gsutil doesn't automatically

296 recover, using a bash script like the following:

297

298 status=1

299 while [ $status -ne 0 ] ; do

300 gsutil cp -c -n -R ./dir gs://bucket

301 status=$?

302 done

303

304 The -c option will cause copying to continue after failures

305 occur, and the -n option will cause objects already copied to be

306 skipped on subsequent iterations. The loop will continue running

307 as long as gsutil exits with a non-zero status (such a status

308 indicates there was at least one failure during the gsutil run).

309

310 -p Causes ACLs to be preserved when copying in the cloud. Note that

311 this option has performance and cost implications, because it

312 is essentially performing three requests (getacl, cp, setacl).

313 (The performance issue can be mitigated to some degree by

314 using gsutil -m cp to cause parallel copying.)

315

316 You can avoid the additional performance and cost of using cp -p

317 if you want all objects in the destination bucket to end up with

318 the same ACL by setting a default ACL on that bucket instead of

319 using cp -p. See "help gsutil setdefacl".

320

321 Note that it's not valid to specify both the -a and -p options

322 together.

323

324 -q Causes copies to be performed quietly, i.e., without reporting

325 progress indicators of files being copied. Errors are still

326 reported. This option can be useful for running gsutil from a

327 cron job that logs its output to a file, for which the only

328 information desired in the log is failures.

329

330 -R, -r Causes directories, buckets, and bucket subdirectories to be

331 copied recursively. If you neglect to use this option for

332 an upload, gsutil will copy any files it finds and skip any

333 directories. Similarly, neglecting to specify -R for a download

334 will cause gsutil to copy any objects at the current bucket

335 directory level, and skip any subdirectories.

336

337 -v Requests that the version-specific URI for each uploaded object

338 be printed. Given this URI you can make future upload requests

339 that are safe in the face of concurrent updates, because Google

340 Cloud Storage will refuse to perform the update if the current

341 object version doesn't match the version-specific URI. See

342 'gsutil help versioning' for more details. Note: at present this

343 option does not work correctly for objects copied "in the cloud"

344 (e.g., gsutil cp gs://bucket/obj1 gs://bucket/obj2).

345

346 -z ext1,... Compresses file uploads with the given extensions. If you are

347 uploading a large file with compressible content, such as

348 a .js, .css, or .html file, you can gzip-compress the file

349 during the upload process by specifying the -z <extensions>

350 option. Compressing data before upload saves on usage charges

351 because you are uploading a smaller amount of data.

352

353 When you specify the -z option, the data from your files is

354 compressed before it is uploaded, but your actual files are left

355 uncompressed on the local disk. The uploaded objects retain the

356 original content type and name as the original files but are

357 given a Content-Encoding header with the value "gzip" to

358 indicate that the object data stored are compressed on the

359 Google Cloud Storage servers.

360

361 For example, the following command:

362

363 gsutil cp -z html -a public-read cattypes.html gs://mycats

364

365 will do all of the following:

366 - Upload as the object gs://mycats/cattypes.html (cp command)

367 - Set the Content-Type to text/html (based on file extension)

368 - Compress the data in the file cattypes.html (-z option)

369 - Set the Content-Encoding to gzip (-z option)

370 - Set the ACL to public-read (-a option)

371 - If a user tries to view cattypes.html in a browser, the

372 browser will know to uncompress the data based on the

373 Content-Encoding header, and to render it as HTML based on

374 the Content-Type header.

375 """)

376

377 class CpCommand(Command):

378 """

379 Implementation of gsutil cp command.

380

381 Note that CpCommand is run for both gsutil cp and gsutil mv. The latter

382 happens by MvCommand calling CpCommand and passing the hidden (undocumented)

383 -M option. This allows the copy and remove needed for each mv to run

384 together (rather than first running all the cp's and then all the rm's, as

385 we originally had implemented), which in turn avoids the following problem

386 with removing the wrong objects: starting with a bucket containing only

387 the object gs://bucket/obj, say the user does:

388 gsutil mv gs://bucket/* gs://bucket/d.txt

389 If we ran all the cp's and then all the rm's and we didn't expand the wildcard

390 first, the cp command would first copy gs://bucket/obj to gs://bucket/d.txt,

391 and the rm command would then remove that object. In the implementation

392 prior to gsutil release 3.12 we avoided this by building a list of objects

393 to process and then running the copies and then the removes; but building

394 the list up front limits scalability (compared with the current approach

395 of processing the bucket listing iterator on the fly).

396 """

397

398 # Set default Content-Type type.

399 DEFAULT_CONTENT_TYPE = 'application/octet-stream'

400 USE_MAGICFILE = boto.config.getbool('GSUtil', 'use_magicfile', False)

401

402 # Command specification (processed by parent class).

403 command_spec = {

404 # Name of command.

405 COMMAND_NAME : 'cp',

406 # List of command name aliases.

407 COMMAND_NAME_ALIASES : ['copy'],

408 # Min number of args required by this command.

409 MIN_ARGS : 1,

410 # Max number of args required by this command, or NO_MAX.

411 MAX_ARGS : NO_MAX,

412 # Getopt-style string specifying acceptable sub args.

413 # -t is deprecated but leave intact for now to avoid breakage.

414 SUPPORTED_SUB_ARGS : 'a:cDeIMNnpqrRtvz:',

415 # True if file URIs acceptable for this command.

416 FILE_URIS_OK : True,

417 # True if provider-only URIs acceptable for this command.

418 PROVIDER_URIS_OK : False,

419 # Index in args of first URI arg.

420 URIS_START_ARG : 0,

421 # True if must configure gsutil before running command.

422 CONFIG_REQUIRED : True,

423 }

424 help_spec = {

425 # Name of command or auxiliary help info for which this help applies.

426 HELP_NAME : 'cp',

427 # List of help name aliases.

428 HELP_NAME_ALIASES : ['copy'],

429 # Type of help:

430 HELP_TYPE : HelpType.COMMAND_HELP,

431 # One line summary of this help.

432 HELP_ONE_LINE_SUMMARY : 'Copy files and objects',

433 # The full help text.

434 HELP_TEXT : _detailed_help_text,

435 }

436

437 def _CheckFinalMd5(self, key, file_name):

438 """

439 Checks that etag from server agrees with md5 computed after the

440 download completes.

441 """

442 obj_md5 = key.etag.strip('"\'')

443 file_md5 = None

444

445 if hasattr(key, 'md5') and key.md5:

446 file_md5 = key.md5

447 else:

448 print 'Computing MD5 from scratch for resumed download'

449

450 # Open file in binary mode to avoid surprises in Windows.

451 fp = open(file_name, 'rb')

452 try:

453 file_md5 = key.compute_md5(fp)[0]

454 finally:

455 fp.close()

456

457 if self.debug:

458 print 'Checking file md5 against etag. (%s/%s)' % (file_md5, obj_md5)

459 if file_md5 != obj_md5:

460 # Checksums don't match - remove file and raise exception.

461 os.unlink(file_name)

462 raise CommandException(

463 'File changed during download: md5 signature doesn\'t match '

464 'etag (incorrect downloaded file deleted)')

465

466 def _CheckForDirFileConflict(self, exp_src_uri, dst_uri):

467 """Checks whether copying exp_src_uri into dst_uri is not possible.

468

469 This happens if a directory exists in local file system where a file

470 needs to go or vice versa. In that case we print an error message and

471 exits. Example: if the file "./x" exists and you try to do:

472 gsutil cp gs://mybucket/x/y .

473 the request can't succeed because it requires a directory where

474 the file x exists.

475

476 Note that we don't enforce any corresponding restrictions for buckets,

477 because the flat namespace semantics for buckets doesn't prohibit such

478 cases the way hierarchical file systems do. For example, if a bucket

479 contains an object called gs://bucket/dir and then you run the command:

480 gsutil cp file1 file2 gs://bucket/dir

481 you'll end up with objects gs://bucket/dir, gs://bucket/dir/file1, and

482 gs://bucket/dir/file2.

483

484 Args:

485 exp_src_uri: Expanded source StorageUri of copy.

486 dst_uri: Destination URI.

487

488 Raises:

489 CommandException: if errors encountered.

490 """

491 if dst_uri.is_cloud_uri():

492 # The problem can only happen for file destination URIs.

493 return

494 dst_path = dst_uri.object_name

495 final_dir = os.path.dirname(dst_path)

496 if os.path.isfile(final_dir):

497 raise CommandException('Cannot retrieve %s because a file exists '

498 'where a directory needs to be created (%s).' %

499 (exp_src_uri, final_dir))

500 if os.path.isdir(dst_path):

501 raise CommandException('Cannot retrieve %s because a directory exists '

502 '(%s) where the file needs to be created.' %

503 (exp_src_uri, dst_path))

504

505 def _InsistDstUriNamesContainer(self, exp_dst_uri,

506 have_existing_dst_container, command_name):

507 """

508 Raises an exception if URI doesn't name a directory, bucket, or bucket

509 subdir, with special exception for cp -R (see comments below).

510

511 Args:

512 exp_dst_uri: Wildcard-expanding dst_uri.

513 have_existing_dst_container: bool indicator of whether exp_dst_uri

514 names a container (directory, bucket, or existing bucket subdir).

515 command_name: Name of command making call. May not be the same as

516 self.command_name in the case of commands implemented atop other

517 commands (like mv command).

518

519 Raises:

520 CommandException: if the URI being checked does not name a container.

521 """

522 if exp_dst_uri.is_file_uri():

523 ok = exp_dst_uri.names_directory()

524 else:

525 if have_existing_dst_container:

526 ok = True

527 else:

528 # It's ok to specify a non-existing bucket subdir, for example:

529 # gsutil cp -R dir gs://bucket/abc

530 # where gs://bucket/abc isn't an existing subdir.

531 ok = exp_dst_uri.names_object()

532 if not ok:

533 raise CommandException('Destination URI must name a directory, bucket, '

534 'or bucket\nsubdirectory for the multiple '

535 'source form of the %s command.' % command_name)

536

537 class _FileCopyCallbackHandler(object):

538 """Outputs progress info for large copy requests."""

539

540 def __init__(self, upload):

541 if upload:

542 self.announce_text = 'Uploading'

543 else:

544 self.announce_text = 'Downloading'

545

546 def call(self, total_bytes_transferred, total_size):

547 sys.stderr.write('%s: %s/%s \r' % (

548 self.announce_text,

549 MakeHumanReadable(total_bytes_transferred),

550 MakeHumanReadable(total_size)))

551 if total_bytes_transferred == total_size:

552 sys.stderr.write('\n')

553

554 class _StreamCopyCallbackHandler(object):

555 """Outputs progress info for Stream copy to cloud.

556 Total Size of the stream is not known, so we output

557 only the bytes transferred.

558 """

559

560 def call(self, total_bytes_transferred, total_size):

561 sys.stderr.write('Uploading: %s \r' % (

562 MakeHumanReadable(total_bytes_transferred)))

563 if total_size and total_bytes_transferred == total_size:

564 sys.stderr.write('\n')

565

566 def _GetTransferHandlers(self, dst_uri, size, upload):

567 """

568 Selects upload/download and callback handlers.

569

570 We use a callback handler that shows a simple textual progress indicator

571 if size is above the configurable threshold.

572

573 We use a resumable transfer handler if size is >= the configurable

574 threshold and resumable transfers are supported by the given provider.

575 boto supports resumable downloads for all providers, but resumable

576 uploads are currently only supported by GS.

577

578 Args:

579 dst_uri: the destination URI.

580 size: size of file (object) being uploaded (downloaded).

581 upload: bool indication of whether transfer is an upload.

582 """

583 config = boto.config

584 resumable_threshold = config.getint('GSUtil', 'resumable_threshold', TWO_MB)

585 transfer_handler = None

586 cb = None

587 num_cb = None

588

589 # Checks whether the destination file is a "special" file, like /dev/null on

590 # Linux platforms or null on Windows platforms, so we can disable resumable

591 # download support since the file size of the destination won't ever be

592 # correct.

593 dst_is_special = False

594 if dst_uri.is_file_uri():

595 # Check explicitly first because os.stat doesn't work on 'nul' in Windows.

596 if dst_uri.object_name == os.devnull:

597 dst_is_special = True

598 try:

599 mode = os.stat(dst_uri.object_name).st_mode

600 if stat.S_ISCHR(mode):

601 dst_is_special = True

602 except OSError:

603 pass

604

605 if size >= resumable_threshold and not dst_is_special:

606 if not self.quiet:

607 cb = self._FileCopyCallbackHandler(upload).call

608 num_cb = int(size / TWO_MB)

609

610 resumable_tracker_dir = config.get(

611 'GSUtil', 'resumable_tracker_dir',

612 os.path.expanduser('~' + os.sep + '.gsutil'))

613 if not os.path.exists(resumable_tracker_dir):

614 os.makedirs(resumable_tracker_dir)

615

616 if upload:

617 # Encode the dest bucket and object name into the tracker file name.

618 res_tracker_file_name = (

619 re.sub('[/\\\\]', '_', 'resumable_upload__%s__%s.url' %

620 (dst_uri.bucket_name, dst_uri.object_name)))

621 else:

622 # Encode the fully-qualified dest file name into the tracker file name.

623 res_tracker_file_name = (

624 re.sub('[/\\\\]', '_', 'resumable_download__%s.etag' %

625 (os.path.realpath(dst_uri.object_name))))

626

627 res_tracker_file_name = _hash_filename(res_tracker_file_name)

628 tracker_file = '%s%s%s' % (resumable_tracker_dir, os.sep,

629 res_tracker_file_name)

630 if upload:

631 if dst_uri.scheme == 'gs':

632 transfer_handler = ResumableUploadHandler(tracker_file)

633 else:

634 transfer_handler = ResumableDownloadHandler(tracker_file)

635

636 return (cb, num_cb, transfer_handler)

637

638 def _LogCopyOperation(self, src_uri, dst_uri, headers):

639 """

640 Logs copy operation being performed, including Content-Type if appropriate.

641 """

642 if self.quiet:

643 return

644 if 'Content-Type' in headers and dst_uri.is_cloud_uri():

645 content_type_msg = ' [Content-Type=%s]' % headers['Content-Type']

646 else:

647 content_type_msg = ''

648 if src_uri.is_stream():

649 self.THREADED_LOGGER.info('Copying from <STDIN>%s...', content_type_msg)

650 else:

651 self.THREADED_LOGGER.info('Copying %s%s...', src_uri, content_type_msg)

652

653 # We pass the headers explicitly to this call instead of using self.headers

654 # so we can set different metadata (like Content-Type type) for each object.

655 def _CopyObjToObjInTheCloud(self, src_key, src_uri, dst_uri, headers):

656 """Performs copy-in-the cloud from specified src to dest object.

657

658 Args:

659 src_key: Source Key.

660 src_uri: Source StorageUri.

661 dst_uri: Destination StorageUri.

662 headers: A copy of the headers dictionary.

663

664 Returns:

665 (elapsed_time, bytes_transferred, dst_uri) excluding overhead like initial

666 HEAD. Note: At present copy-in-the-cloud doesn't return the generation of

667 the created object, so the returned URI is actually not version-specific

668 (unlike other cp cases).

669

670 Raises:

671 CommandException: if errors encountered.

672 """

673 self._SetContentTypeHeader(src_uri, headers)

674 self._LogCopyOperation(src_uri, dst_uri, headers)

675 # Do Object -> object copy within same provider (uses

676 # x-<provider>-copy-source metadata HTTP header to request copying at the

677 # server).

678 src_bucket = src_uri.get_bucket(False, headers)

679 preserve_acl = False

680 canned_acl = None

681 if self.sub_opts:

682 for o, a in self.sub_opts:

683 if o == '-a':

684 canned_acls = dst_uri.canned_acls()

685 if a not in canned_acls:

686 raise CommandException('Invalid canned ACL "%s".' % a)

687 canned_acl = a

688 headers[dst_uri.get_provider().acl_header] = canned_acl

689 if o == '-p':

690 preserve_acl = True

691 if preserve_acl and canned_acl:

692 raise CommandException(

693 'Specifying both the -p and -a options together is invalid.')

694 start_time = time.time()

695 # Pass headers in headers param not metadata param, so boto will copy

696 # existing key's metadata and just set the additional headers specified

697 # in the headers param (rather than using the headers to override existing

698 # metadata). In particular this allows us to copy the existing key's

699 # Content-Type and other metadata users need while still being able to

700 # set headers the API needs (like x-goog-project-id). Note that this means

701 # you can't do something like:

702 # gsutil cp -t Content-Type text/html gs://bucket/* gs://bucket2

703 # to change the Content-Type while copying.

704

705 try:

706 dst_key = dst_uri.copy_key(

707 src_bucket.name, src_uri.object_name, preserve_acl=False,

708 headers=headers, src_version_id=src_uri.version_id,

709 src_generation=src_uri.generation)

710 except GSResponseError as e:

711 exc_name, error_detail = ExtractErrorDetail(e)

712 if (exc_name == 'GSResponseError'

713 and ('Copy-in-the-cloud disallowed' in error_detail)):

714 raise CommandException('%s.\nNote: you can copy between locations '

715 'and between storage classes by using the '

716 'gsutil cp -D option.' % error_detail)

717 else:

718 raise

719 end_time = time.time()

720 return (end_time - start_time, src_key.size,

721 dst_uri.clone_replace_key(dst_key))

722

723 def _CheckFreeSpace(self, path):

724 """Return path/drive free space (in bytes)."""

725 if platform.system() == 'Windows':

726 from ctypes import c_int, c_uint64, c_wchar_p, windll, POINTER, WINFUNCTYP E, WinError

727 try:

728 GetDiskFreeSpaceEx = WINFUNCTYPE(c_int, c_wchar_p, POINTER(c_uint64),

729 POINTER(c_uint64), POINTER(c_uint64))

730 GetDiskFreeSpaceEx = GetDiskFreeSpaceEx(

731 ('GetDiskFreeSpaceExW', windll.kernel32), (

732 (1, 'lpszPathName'),

733 (2, 'lpFreeUserSpace'),

734 (2, 'lpTotalSpace'),

735 (2, 'lpFreeSpace'),))

736 except AttributeError:

737 GetDiskFreeSpaceEx = WINFUNCTYPE(c_int, c_char_p, POINTER(c_uint64),

738 POINTER(c_uint64), POINTER(c_uint64))

739 GetDiskFreeSpaceEx = GetDiskFreeSpaceEx(

740 ('GetDiskFreeSpaceExA', windll.kernel32), (

741 (1, 'lpszPathName'),

742 (2, 'lpFreeUserSpace'),

743 (2, 'lpTotalSpace'),

744 (2, 'lpFreeSpace'),))

745

746 def GetDiskFreeSpaceEx_errcheck(result, func, args):

747 if not result:

748 raise WinError()

749 return args[1].value

750 GetDiskFreeSpaceEx.errcheck = GetDiskFreeSpaceEx_errcheck

751

752 return GetDiskFreeSpaceEx(os.getenv('SystemDrive'))

753 else:

754 (_, f_frsize, _, _, f_bavail, _, _, _, _, _) = os.statvfs(path)

755 return f_frsize * f_bavail

756

757 def _PerformResumableUploadIfApplies(self, fp, dst_uri, canned_acl, headers):

758 """

759 Performs resumable upload if supported by provider and file is above

760 threshold, else performs non-resumable upload.

761

762 Returns (elapsed_time, bytes_transferred, version-specific dst_uri).

763 """

764 start_time = time.time()

765 # Determine file size different ways for case where fp is actually a wrapper

766 # around a Key vs an actual file.

767 if isinstance(fp, KeyFile):

768 file_size = fp.getkey().size

769 else:

770 file_size = os.path.getsize(fp.name)

771 (cb, num_cb, res_upload_handler) = self._GetTransferHandlers(

772 dst_uri, file_size, True)

773 if dst_uri.scheme == 'gs':

774 # Resumable upload protocol is Google Cloud Storage-specific.

775 dst_uri.set_contents_from_file(fp, headers, policy=canned_acl,

776 cb=cb, num_cb=num_cb,

777 res_upload_handler=res_upload_handler)

778 else:

779 dst_uri.set_contents_from_file(fp, headers, policy=canned_acl,

780 cb=cb, num_cb=num_cb)

781 if res_upload_handler:

782 # ResumableUploadHandler does not update upload_start_point from its

783 # initial value of -1 if transferring the whole file, so clamp at 0

784 bytes_transferred = file_size - max(

785 res_upload_handler.upload_start_point, 0)

786 else:

787 bytes_transferred = file_size

788 end_time = time.time()

789 return (end_time - start_time, bytes_transferred, dst_uri)

790

791 def _PerformStreamingUpload(self, fp, dst_uri, headers, canned_acl=None):

792 """

793 Performs a streaming upload to the cloud.

794

795 Args:

796 fp: The file whose contents to upload.

797 dst_uri: Destination StorageUri.

798 headers: A copy of the headers dictionary.

799 canned_acl: Optional canned ACL to set on the object.

800

801 Returns (elapsed_time, bytes_transferred, version-specific dst_uri).

802 """

803 start_time = time.time()

804

805 if self.quiet:

806 cb = None

807 else:

808 cb = self._StreamCopyCallbackHandler().call

809 dst_uri.set_contents_from_stream(

810 fp, headers, policy=canned_acl, cb=cb)

811 try:

812 bytes_transferred = fp.tell()

813 except:

814 bytes_transferred = 0

815

816 end_time = time.time()

817 return (end_time - start_time, bytes_transferred, dst_uri)

818

819 def _SetContentTypeHeader(self, src_uri, headers):

820 """

821 Sets content type header to value specified in '-h Content-Type' option (if

822 specified); else sets using Content-Type detection.

823 """

824 if 'Content-Type' in headers:

825 # If empty string specified (i.e., -h "Content-Type:") set header to None,

826 # which will inhibit boto from sending the CT header. Otherwise, boto will

827 # pass through the user specified CT header.

828 if not headers['Content-Type']:

829 headers['Content-Type'] = None

830 # else we'll keep the value passed in via -h option (not performing

831 # content type detection).

832 else:

833 # Only do content type recognition is src_uri is a file. Object-to-object

834 # copies with no -h Content-Type specified re-use the content type of the

835 # source object.

836 if src_uri.is_file_uri():

837 object_name = src_uri.object_name

838 content_type = None

839 # Streams (denoted by '-') are expected to be 'application/octet-stream'

840 # and 'file' would partially consume them.

841 if object_name != '-':

842 if self.USE_MAGICFILE:

843 p = subprocess.Popen(['file', '--mime-type', object_name],

844 stdout=subprocess.PIPE, stderr=subprocess.PIPE)

845 output, error = p.communicate()

846 if p.returncode != 0 or error:

847 raise CommandException(

848 'Encountered error running "file --mime-type %s" '

849 '(returncode=%d).\n%s' % (object_name, p.returncode, error))

850 # Parse output by removing line delimiter and splitting on last ":

851 content_type = output.rstrip().rpartition(': ')[2]

852 else:

853 content_type = mimetypes.guess_type(object_name)[0]

854 if not content_type:

855 content_type = self.DEFAULT_CONTENT_TYPE

856 headers['Content-Type'] = content_type

857

858 def _UploadFileToObject(self, src_key, src_uri, dst_uri, headers,

859 should_log=True):

860 """Uploads a local file to an object.

861

862 Args:

863 src_key: Source StorageUri. Must be a file URI.

864 src_uri: Source StorageUri.

865 dst_uri: Destination StorageUri.

866 headers: The headers dictionary.

867 should_log: bool indicator whether we should log this operation.

868 Returns:

869 (elapsed_time, bytes_transferred, version-specific dst_uri), excluding

870 overhead like initial HEAD.

871

872 Raises:

873 CommandException: if errors encountered.

874 """

875 gzip_exts = []

876 canned_acl = None

877 if self.sub_opts:

878 for o, a in self.sub_opts:

879 if o == '-a':

880 canned_acls = dst_uri.canned_acls()

881 if a not in canned_acls:

882 raise CommandException('Invalid canned ACL "%s".' % a)

883 canned_acl = a

884 elif o == '-t':

885 print('Warning: -t is deprecated, and will be removed in the future. '

886 'Content type\ndetection is '

887 'now performed by default, unless inhibited by specifying '

888 'a\nContent-Type header via the -h option.')

889 elif o == '-z':

890 gzip_exts = a.split(',')

891

892 self._SetContentTypeHeader(src_uri, headers)

893 if should_log:

894 self._LogCopyOperation(src_uri, dst_uri, headers)

895

896 if 'Content-Language' not in headers:

897 content_language = config.get_value('GSUtil', 'content_language')

898 if content_language:

899 headers['Content-Language'] = content_language

900

901 fname_parts = src_uri.object_name.split('.')

902 if len(fname_parts) > 1 and fname_parts[-1] in gzip_exts:

903 if self.debug:

904 print 'Compressing %s (to tmp)...' % src_key

905 (gzip_fh, gzip_path) = tempfile.mkstemp()

906 gzip_fp = None

907 try:

908 # Check for temp space. Assume the compressed object is at most 2x

909 # the size of the object (normally should compress to smaller than

910 # the object)

911 if (self._CheckFreeSpace(gzip_path)

912 < 2*int(os.path.getsize(src_key.name))):

913 raise CommandException('Inadequate temp space available to compress '

914 '%s' % src_key.name)

915 gzip_fp = gzip.open(gzip_path, 'wb')

916 gzip_fp.writelines(src_key.fp)

917 finally:

918 if gzip_fp:

919 gzip_fp.close()

920 os.close(gzip_fh)

921 headers['Content-Encoding'] = 'gzip'

922 gzip_fp = open(gzip_path, 'rb')

923 try:

924 (elapsed_time, bytes_transferred, result_uri) = (

925 self._PerformResumableUploadIfApplies(gzip_fp, dst_uri,

926 canned_acl, headers))

927 finally:

928 gzip_fp.close()

929 try:

930 os.unlink(gzip_path)

931 # Windows sometimes complains the temp file is locked when you try to

932 # delete it.

933 except Exception, e:

934 pass

935 elif (src_key.is_stream()

936 and dst_uri.get_provider().supports_chunked_transfer()):

937 (elapsed_time, bytes_transferred, result_uri) = (

938 self._PerformStreamingUpload(src_key.fp, dst_uri, headers,

939 canned_acl))

940 else:

941 if src_key.is_stream():

942 # For Providers that doesn't support chunked Transfers

943 tmp = tempfile.NamedTemporaryFile()

944 file_uri = self.suri_builder.StorageUri('file://%s' % tmp.name)

945 try:

946 file_uri.new_key(False, headers).set_contents_from_file(

947 src_key.fp, headers)

948 src_key = file_uri.get_key()

949 finally:

950 file_uri.close()

951 try:

952 (elapsed_time, bytes_transferred, result_uri) = (

953 self._PerformResumableUploadIfApplies(src_key.fp, dst_uri,

954 canned_acl, headers))

955 finally:

956 if src_key.is_stream():

957 tmp.close()

958 else:

959 src_key.close()

960

961 return (elapsed_time, bytes_transferred, result_uri)

962

963 def _DownloadObjectToFile(self, src_key, src_uri, dst_uri, headers,

964 should_log=True):

965 """Downloads an object to a local file.

966

967 Args:

968 src_key: Source StorageUri. Must be a file URI.

969 src_uri: Source StorageUri.

970 dst_uri: Destination StorageUri.

971 headers: The headers dictionary.

972 should_log: bool indicator whether we should log this operation.

973 Returns:

974 (elapsed_time, bytes_transferred, dst_uri), excluding overhead like

975 initial HEAD.

976

977 Raises:

978 CommandException: if errors encountered.

979 """

980 if should_log:

981 self._LogCopyOperation(src_uri, dst_uri, headers)

982 (cb, num_cb, res_download_handler) = self._GetTransferHandlers(

983 dst_uri, src_key.size, False)

984 file_name = dst_uri.object_name

985 dir_name = os.path.dirname(file_name)

986 if dir_name and not os.path.exists(dir_name):

987 # Do dir creation in try block so can ignore case where dir already

988 # exists. This is needed to avoid a race condition when running gsutil

989 # -m cp.

990 try:

991 os.makedirs(dir_name)

992 except OSError, e:

993 if e.errno != errno.EEXIST:

994 raise

995 # For gzipped objects not named *.gz download to a temp file and unzip.

996 if (hasattr(src_key, 'content_encoding')

997 and src_key.content_encoding == 'gzip'

998 and not file_name.endswith('.gz')):

999 # We can't use tempfile.mkstemp() here because we need a predictable

1000 # filename for resumable downloads.

1001 download_file_name = '%s_.gztmp' % file_name

1002 need_to_unzip = True

1003 else:

1004 download_file_name = file_name

1005 need_to_unzip = False

1006 fp = None

1007 try:

1008 if res_download_handler:

1009 fp = open(download_file_name, 'ab')

1010 else:

1011 fp = open(download_file_name, 'wb')

1012 start_time = time.time()

1013 src_key.get_contents_to_file(fp, headers, cb=cb, num_cb=num_cb,

1014 res_download_handler=res_download_handler)

1015 # If a custom test method is defined, call it here. For the copy command,

1016 # test methods are expected to take one argument: an open file pointer,

1017 # and are used to perturb the open file during download to exercise

1018 # download error detection.

1019 if self.test_method:

1020 self.test_method(fp)

1021 end_time = time.time()

1022 finally:

1023 if fp:

1024 fp.close()

1025

1026 # Discard the md5 if we are resuming a partial download.

1027 if res_download_handler and res_download_handler.download_start_point:

1028 src_key.md5 = None

1029

1030 # Verify downloaded file checksum matched source object's checksum.

1031 self._CheckFinalMd5(src_key, download_file_name)

1032

1033 if res_download_handler:

1034 bytes_transferred = (

1035 src_key.size - res_download_handler.download_start_point)

1036 else:

1037 bytes_transferred = src_key.size

1038 if need_to_unzip:

1039 # Log that we're uncompressing if the file is big enough that

1040 # decompressing would make it look like the transfer "stalled" at the end.

1041 if not self.quiet and bytes_transferred > 10 * 1024 * 1024:

1042 self.THREADED_LOGGER.info('Uncompressing downloaded tmp file to %s...',

1043 file_name)

1044 # Downloaded gzipped file to a filename w/o .gz extension, so unzip.

1045 f_in = gzip.open(download_file_name, 'rb')

1046 f_out = open(file_name, 'wb')

1047 try:

1048 while True:

1049 data = f_in.read(8192)

1050 if not data:

1051 break

1052 f_out.write(data)

1053 finally:

1054 f_out.close()

1055 f_in.close()

1056 os.unlink(download_file_name)

1057 return (end_time - start_time, bytes_transferred, dst_uri)

1058

1059 def _PerformDownloadToStream(self, src_key, src_uri, str_fp, headers):

1060 (cb, num_cb, res_download_handler) = self._GetTransferHandlers(

1061 src_uri, src_key.size, False)

1062 start_time = time.time()

1063 src_key.get_contents_to_file(str_fp, headers, cb=cb, num_cb=num_cb)

1064 end_time = time.time()

1065 bytes_transferred = src_key.size

1066 end_time = time.time()

1067 return (end_time - start_time, bytes_transferred)

1068

1069 def _CopyFileToFile(self, src_key, src_uri, dst_uri, headers):

1070 """Copies a local file to a local file.

1071

1072 Args:

1073 src_key: Source StorageUri. Must be a file URI.

1074 src_uri: Source StorageUri.

1075 dst_uri: Destination StorageUri.

1076 headers: The headers dictionary.

1077 Returns:

1078 (elapsed_time, bytes_transferred, dst_uri), excluding

1079 overhead like initial HEAD.

1080

1081 Raises:

1082 CommandException: if errors encountered.

1083 """

1084 self._LogCopyOperation(src_uri, dst_uri, headers)

1085 dst_key = dst_uri.new_key(False, headers)

1086 start_time = time.time()

1087 dst_key.set_contents_from_file(src_key.fp, headers)

1088 end_time = time.time()

1089 return (end_time - start_time, os.path.getsize(src_key.fp.name), dst_uri)

1090

1091 def _CopyObjToObjDaisyChainMode(self, src_key, src_uri, dst_uri, headers):

1092 """Copies from src_uri to dst_uri in "daisy chain" mode.

1093 See -D OPTION documentation about what daisy chain mode is.

1094

1095 Args:

1096 src_key: Source Key.

1097 src_uri: Source StorageUri.

1098 dst_uri: Destination StorageUri.

1099 headers: A copy of the headers dictionary.

1100

1101 Returns:

1102 (elapsed_time, bytes_transferred, version-specific dst_uri) excluding

1103 overhead like initial HEAD.

1104

1105 Raises:

1106 CommandException: if errors encountered.

1107 """

1108 self._SetContentTypeHeader(src_uri, headers)

1109 self._LogCopyOperation(src_uri, dst_uri, headers)

1110 canned_acl = None

1111 if self.sub_opts:

1112 for o, a in self.sub_opts:

1113 if o == '-a':

1114 canned_acls = dst_uri.canned_acls()

1115 if a not in canned_acls:

1116 raise CommandException('Invalid canned ACL "%s".' % a)

1117 canned_acl = a

1118 elif o == '-p':

1119 # We don't attempt to preserve ACLs across providers because

1120 # GCS and S3 support different ACLs and disjoint principals.

1121 raise NotImplementedError('Cross-provider cp -p not supported')

1122 return self._PerformResumableUploadIfApplies(KeyFile(src_key), dst_uri,

1123 canned_acl, headers)

1124

1125 def _PerformCopy(self, src_uri, dst_uri):

1126 """Performs copy from src_uri to dst_uri, handling various special cases.

1127

1128 Args:

1129 src_uri: Source StorageUri.

1130 dst_uri: Destination StorageUri.

1131

1132 Returns:

1133 (elapsed_time, bytes_transferred, version-specific dst_uri) excluding

1134 overhead like initial HEAD.

1135

1136 Raises:

1137 CommandException: if errors encountered.

1138 """

1139 # Make a copy of the input headers each time so we can set a different

1140 # content type for each object.

1141 if self.headers:

1142 headers = self.headers.copy()

1143 else:

1144 headers = {}

1145

1146 src_key = src_uri.get_key(False, headers)

1147 if not src_key:

1148 raise CommandException('"%s" does not exist.' % src_uri)

1149

1150 # On Windows, stdin is opened as text mode instead of binary which causes

1151 # problems when piping a binary file, so this switches it to binary mode.

1152 if IS_WINDOWS and src_uri.is_file_uri() and src_key.is_stream():

1153 import msvcrt

1154 msvcrt.setmode(src_key.fp.fileno(), os.O_BINARY)

1155

1156 if self.no_clobber:

1157 # There are two checks to prevent clobbering:

1158 # 1) The first check is to see if the item

1159 # already exists at the destination and prevent the upload/download

1160 # from happening. This is done by the exists() call.

1161 # 2) The second check is only relevant if we are writing to gs. We can

1162 # enforce that the server only writes the object if it doesn't exist

1163 # by specifying the header below. This check only happens at the

1164 # server after the complete file has been uploaded. We specify this

1165 # header to prevent a race condition where a destination file may

1166 # be created after the first check and before the file is fully

1167 # uploaded.

1168 # In order to save on unnecessary uploads/downloads we perform both

1169 # checks. However, this may come at the cost of additional HTTP calls.

1170 if dst_uri.exists(headers):

1171 if not self.quiet:

1172 self.THREADED_LOGGER.info('Skipping existing item: %s' %

1173 dst_uri.uri)

1174 return (0, 0, None)

1175 if dst_uri.is_cloud_uri() and dst_uri.scheme == 'gs':

1176 headers['x-goog-if-generation-match'] = '0'

1177

1178 if src_uri.is_cloud_uri() and dst_uri.is_cloud_uri():

1179 if src_uri.scheme == dst_uri.scheme and not self.daisy_chain:

1180 return self._CopyObjToObjInTheCloud(src_key, src_uri, dst_uri, headers)

1181 else:

1182 return self._CopyObjToObjDaisyChainMode(src_key, src_uri, dst_uri,

1183 headers)

1184 elif src_uri.is_file_uri() and dst_uri.is_cloud_uri():

1185 return self._UploadFileToObject(src_key, src_uri, dst_uri, headers)

1186 elif src_uri.is_cloud_uri() and dst_uri.is_file_uri():

1187 return self._DownloadObjectToFile(src_key, src_uri, dst_uri, headers)

1188 elif src_uri.is_file_uri() and dst_uri.is_file_uri():

1189 return self._CopyFileToFile(src_key, src_uri, dst_uri, headers)

1190 else:

1191 raise CommandException('Unexpected src/dest case')

1192

1193 def _ExpandDstUri(self, dst_uri_str):

1194 """

1195 Expands wildcard if present in dst_uri_str.

1196

1197 Args:

1198 dst_uri_str: String representation of requested dst_uri.

1199

1200 Returns:

1201 (exp_dst_uri, have_existing_dst_container)

1202 where have_existing_dst_container is a bool indicating whether

1203 exp_dst_uri names an existing directory, bucket, or bucket subdirectory.

1204

1205 Raises:

1206 CommandException: if dst_uri_str matched more than 1 URI.

1207 """

1208 dst_uri = self.suri_builder.StorageUri(dst_uri_str)

1209

1210 # Handle wildcarded dst_uri case.

1211 if ContainsWildcard(dst_uri):

1212 blr_expansion = list(self.WildcardIterator(dst_uri))

1213 if len(blr_expansion) != 1:

1214 raise CommandException('Destination (%s) must match exactly 1 URI' %

1215 dst_uri_str)

1216 blr = blr_expansion[0]

1217 uri = blr.GetUri()

1218 if uri.is_cloud_uri():

1219 return (uri, uri.names_bucket() or blr.HasPrefix()

1220 or blr.GetKey().endswith('/'))

1221 else:

1222 return (uri, uri.names_directory())

1223

1224 # Handle non-wildcarded dst_uri:

1225 if dst_uri.is_file_uri():

1226 return (dst_uri, dst_uri.names_directory())

1227 if dst_uri.names_bucket():

1228 return (dst_uri, True)

1229 # For object URIs check 3 cases: (a) if the name ends with '/' treat as a

1230 # subdir; else, perform a wildcard expansion with dst_uri + "*" and then

1231 # find if (b) there's a Prefix matching dst_uri, or (c) name is of form

1232 # dir_$folder$ (and in both these cases also treat dir as a subdir).

1233 if dst_uri.is_cloud_uri() and dst_uri_str.endswith('/'):

1234 return (dst_uri, True)

1235 blr_expansion = list(self.WildcardIterator(

1236 '%s*' % dst_uri_str.rstrip(dst_uri.delim)))

1237 for blr in blr_expansion:

1238 if blr.GetRStrippedUriString().endswith('_$folder$'):

1239 return (dst_uri, True)

1240 if blr.GetRStrippedUriString() == dst_uri_str.rstrip(dst_uri.delim):

1241 return (dst_uri, blr.HasPrefix())

1242 return (dst_uri, False)

1243

1244 def _ConstructDstUri(self, src_uri, exp_src_uri,

1245 src_uri_names_container, src_uri_expands_to_multi,

1246 have_multiple_srcs, exp_dst_uri,

1247 have_existing_dest_subdir):

1248 """

1249 Constructs the destination URI for a given exp_src_uri/exp_dst_uri pair,

1250 using context-dependent naming rules that mimic Linux cp and mv behavior.

1251

1252 Args:

1253 src_uri: src_uri to be copied.

1254 exp_src_uri: Single StorageUri from wildcard expansion of src_uri.

1255 src_uri_names_container: True if src_uri names a container (including the

1256 case of a wildcard-named bucket subdir (like gs://bucket/abc,

1257 where gs://bucket/abc/* matched some objects). Note that this is

1258 additional semantics tha src_uri.names_container() doesn't understand

1259 because the latter only understands StorageUris, not wildcards.

1260 src_uri_expands_to_multi: True if src_uri expanded to multiple URIs.

1261 have_multiple_srcs: True if this is a multi-source request. This can be

1262 true if src_uri wildcard-expanded to multiple URIs or if there were

1263 multiple source URIs in the request.

1264 exp_dst_uri: the expanded StorageUri requested for the cp destination.

1265 Final written path is constructed from this plus a context-dependent

1266 variant of src_uri.

1267 have_existing_dest_subdir: bool indicator whether dest is an existing

1268 subdirectory.

1269

1270 Returns:

1271 StorageUri to use for copy.

1272

1273 Raises:

1274 CommandException if destination object name not specified for

1275 source and source is a stream.

1276 """

1277 if self._ShouldTreatDstUriAsSingleton(

1278 have_multiple_srcs, have_existing_dest_subdir, exp_dst_uri):

1279 # We're copying one file or object to one file or object.

1280 return exp_dst_uri

1281

1282 if exp_src_uri.is_stream():

1283 if exp_dst_uri.names_container():

1284 raise CommandException('Destination object name needed when '

1285 'source is a stream')

1286 return exp_dst_uri

1287

1288 if not self.recursion_requested and not have_multiple_srcs:

1289 # We're copying one file or object to a subdirectory. Append final comp

1290 # of exp_src_uri to exp_dst_uri.

1291 src_final_comp = exp_src_uri.object_name.rpartition(src_uri.delim)[-1]

1292 return self.suri_builder.StorageUri('%s%s%s' % (

1293 exp_dst_uri.uri.rstrip(exp_dst_uri.delim), exp_dst_uri.delim,

1294 src_final_comp))

1295

1296 # Else we're copying multiple sources to a directory, bucket, or a bucket

1297 # "sub-directory".

1298

1299 # Ensure exp_dst_uri ends in delim char if we're doing a multi-src copy or

1300 # a copy to a directory. (The check for copying to a directory needs

1301 # special-case handling so that the command:

1302 # gsutil cp gs://bucket/obj dir

1303 # will turn into file://dir/ instead of file://dir -- the latter would cause

1304 # the file "dirobj" to be created.)

1305 # Note: need to check have_multiple_srcs or src_uri.names_container()

1306 # because src_uri could be a bucket containing a single object, named

1307 # as gs://bucket.

1308 if ((have_multiple_srcs or src_uri.names_container()

1309 or os.path.isdir(exp_dst_uri.object_name))

1310 and not exp_dst_uri.uri.endswith(exp_dst_uri.delim)):

1311 exp_dst_uri = exp_dst_uri.clone_replace_name(

1312 '%s%s' % (exp_dst_uri.object_name, exp_dst_uri.delim)

1313 )

1314

1315 # Making naming behavior match how things work with local Linux cp and mv

1316 # operations depends on many factors, including whether the destination is a

1317 # container, the plurality of the source(s), and whether the mv command is

1318 # being used:

1319 # 1. For the "mv" command that specifies a non-existent destination subdir,

1320 # renaming should occur at the level of the src subdir, vs appending that

1321 # subdir beneath the dst subdir like is done for copying. For example:

1322 # gsutil rm -R gs://bucket

1323 # gsutil cp -R dir1 gs://bucket

1324 # gsutil cp -R dir2 gs://bucket/subdir1

1325 # gsutil mv gs://bucket/subdir1 gs://bucket/subdir2

1326 # would (if using cp naming behavior) end up with paths like:

1327 # gs://bucket/subdir2/subdir1/dir2/.svn/all-wcprops

1328 # whereas mv naming behavior should result in:

1329 # gs://bucket/subdir2/dir2/.svn/all-wcprops

1330 # 2. Copying from directories, buckets, or bucket subdirs should result in

1331 # objects/files mirroring the source directory hierarchy. For example:

1332 # gsutil cp dir1/dir2 gs://bucket

1333 # should create the object gs://bucket/dir2/file2, assuming dir1/dir2

1334 # contains file2).

1335 # To be consistent with Linux cp behavior, there's one more wrinkle when

1336 # working with subdirs: The resulting object names depend on whether the

1337 # destination subdirectory exists. For example, if gs://bucket/subdir

1338 # exists, the command:

1339 # gsutil cp -R dir1/dir2 gs://bucket/subdir

1340 # should create objects named like gs://bucket/subdir/dir2/a/b/c. In

1341 # contrast, if gs://bucket/subdir does not exist, this same command

1342 # should create objects named like gs://bucket/subdir/a/b/c.

1343 # 3. Copying individual files or objects to dirs, buckets or bucket subdirs

1344 # should result in objects/files named by the final source file name

1345 # component. Example:

1346 # gsutil cp dir1/*.txt gs://bucket

1347 # should create the objects gs://bucket/f1.txt and gs://bucket/f2.txt,

1348 # assuming dir1 contains f1.txt and f2.txt.

1349

1350 if (self.perform_mv and self.recursion_requested

1351 and src_uri_expands_to_multi and not have_existing_dest_subdir):

1352 # Case 1. Handle naming rules for bucket subdir mv. Here we want to

1353 # line up the src_uri against its expansion, to find the base to build

1354 # the new name. For example, running the command:

1355 # gsutil mv gs://bucket/abcd gs://bucket/xyz

1356 # when processing exp_src_uri=gs://bucket/abcd/123

1357 # exp_src_uri_tail should become /123

1358 # Note: mv.py code disallows wildcard specification of source URI.

1359 exp_src_uri_tail = exp_src_uri.uri[len(src_uri.uri):]

1360 dst_key_name = '%s/%s' % (exp_dst_uri.object_name.rstrip('/'),

1361 exp_src_uri_tail.strip('/'))

1362 return exp_dst_uri.clone_replace_name(dst_key_name)

1363

1364 if src_uri_names_container and not exp_dst_uri.names_file():

1365 # Case 2. Build dst_key_name from subpath of exp_src_uri past

1366 # where src_uri ends. For example, for src_uri=gs://bucket/ and

1367 # exp_src_uri=gs://bucket/src_subdir/obj, dst_key_name should be

1368 # src_subdir/obj.

1369 src_uri_path_sans_final_dir = _GetPathBeforeFinalDir(src_uri)

1370 dst_key_name = exp_src_uri.uri[

1371 len(src_uri_path_sans_final_dir):].lstrip(src_uri.delim)

1372 # Handle case where dst_uri is a non-existent subdir.

1373 if not have_existing_dest_subdir:

1374 dst_key_name = dst_key_name.partition(src_uri.delim)[-1]

1375 # Handle special case where src_uri was a directory named with '.' or

1376 # './', so that running a command like:

1377 # gsutil cp -r . gs://dest

1378 # will produce obj names of the form gs://dest/abc instead of

1379 # gs://dest/./abc.

1380 if dst_key_name.startswith('.%s' % os.sep):

1381 dst_key_name = dst_key_name[2:]

1382

1383 else:

1384 # Case 3.

1385 dst_key_name = exp_src_uri.object_name.rpartition(src_uri.delim)[-1]

1386

1387 if (exp_dst_uri.is_file_uri()

1388 or self._ShouldTreatDstUriAsBucketSubDir(

1389 have_multiple_srcs, exp_dst_uri, have_existing_dest_subdir)):

1390 if exp_dst_uri.object_name.endswith(exp_dst_uri.delim):

1391 dst_key_name = '%s%s%s' % (

1392 exp_dst_uri.object_name.rstrip(exp_dst_uri.delim),

1393 exp_dst_uri.delim, dst_key_name)

1394 else:

1395 delim = exp_dst_uri.delim if exp_dst_uri.object_name else ''

1396 dst_key_name = '%s%s%s' % (exp_dst_uri.object_name, delim, dst_key_name)

1397

1398 return exp_dst_uri.clone_replace_name(dst_key_name)

1399

1400 def _FixWindowsNaming(self, src_uri, dst_uri):

1401 """

1402 Rewrites the destination URI built by _ConstructDstUri() to translate

1403 Windows pathnames to cloud pathnames if needed.

1404

1405 Args:

1406 src_uri: Source URI to be copied.

1407 dst_uri: The destination URI built by _ConstructDstUri().

1408

1409 Returns:

1410 StorageUri to use for copy.

1411 """

1412 if (src_uri.is_file_uri() and src_uri.delim == '\\'

1413 and dst_uri.is_cloud_uri()):

1414 trans_uri_str = re.sub(r'\\', '/', dst_uri.uri)

1415 dst_uri = self.suri_builder.StorageUri(trans_uri_str)

1416 return dst_uri

1417

1418 # Command entry point.

1419 def RunCommand(self):

1420

1421 # Inner funcs.

1422 def _CopyExceptionHandler(e):

1423 """Simple exception handler to allow post-completion status."""

1424 self.THREADED_LOGGER.error(str(e))

1425 self.copy_failure_count += 1

1426

1427 def _CopyFunc(name_expansion_result):

1428 """Worker function for performing the actual copy (and rm, for mv)."""

1429 if self.perform_mv:

1430 cmd_name = 'mv'

1431 else:

1432 cmd_name = self.command_name

1433 src_uri = self.suri_builder.StorageUri(

1434 name_expansion_result.GetSrcUriStr())

1435 exp_src_uri = self.suri_builder.StorageUri(

1436 name_expansion_result.GetExpandedUriStr())

1437 src_uri_names_container = name_expansion_result.NamesContainer()

1438 src_uri_expands_to_multi = name_expansion_result.NamesContainer()

1439 have_multiple_srcs = name_expansion_result.IsMultiSrcRequest()

1440 have_existing_dest_subdir = (

1441 name_expansion_result.HaveExistingDstContainer())

1442

1443 if src_uri.names_provider():

1444 raise CommandException(

1445 'The %s command does not allow provider-only source URIs (%s)' %

1446 (cmd_name, src_uri))

1447 if have_multiple_srcs:

1448 self._InsistDstUriNamesContainer(exp_dst_uri,

1449 have_existing_dst_container,

1450 cmd_name)

1451

1452 if self.perform_mv:

1453 if name_expansion_result.NamesContainer():

1454 # Use recursion_requested when performing name expansion for the

1455 # directory mv case so we can determine if any of the source URIs are

1456 # directories (and then use cp -R and rm -R to perform the move, to

1457 # match the behavior of Linux mv (which when moving a directory moves

1458 # all the contained files).

1459 self.recursion_requested = True

1460 # Disallow wildcard src URIs when moving directories, as supporting it

1461 # would make the name transformation too complex and would also be

1462 # dangerous (e.g., someone could accidentally move many objects to the

1463 # wrong name, or accidentally overwrite many objects).

1464 if ContainsWildcard(src_uri):

1465 raise CommandException('The mv command disallows naming source '

1466 'directories using wildcards')

1467

1468 if (exp_dst_uri.is_file_uri()

1469 and not os.path.exists(exp_dst_uri.object_name)

1470 and have_multiple_srcs):

1471 os.makedirs(exp_dst_uri.object_name)

1472

1473 dst_uri = self._ConstructDstUri(src_uri, exp_src_uri,

1474 src_uri_names_container,

1475 src_uri_expands_to_multi,

1476 have_multiple_srcs, exp_dst_uri,

1477 have_existing_dest_subdir)

1478 dst_uri = self._FixWindowsNaming(src_uri, dst_uri)

1479

1480 self._CheckForDirFileConflict(exp_src_uri, dst_uri)

1481 if self._SrcDstSame(exp_src_uri, dst_uri):

1482 raise CommandException('%s: "%s" and "%s" are the same file - '

1483 'abort.' % (cmd_name, exp_src_uri, dst_uri))

1484

1485 if dst_uri.is_cloud_uri() and dst_uri.is_version_specific:

1486 raise CommandException('%s: a version-specific URI\n(%s)\ncannot be '

1487 'the destination for gsutil cp - abort.'

1488 % (cmd_name, dst_uri))

1489

1490 elapsed_time = bytes_transferred = 0

1491 try:

1492 (elapsed_time, bytes_transferred, result_uri) = (

1493 self._PerformCopy(exp_src_uri, dst_uri))

1494 except Exception, e:

1495 if self._IsNoClobberServerException(e):

1496 if not self.quiet:

1497 self.THREADED_LOGGER.info('Rejected (noclobber): %s' % dst_uri.uri)

1498 elif self.continue_on_error:

1499 if not self.quiet:

1500 self.THREADED_LOGGER.error('Error copying %s: %s' % (src_uri.uri,

1501 str(e)))

1502 self.copy_failure_count += 1

1503 else:

1504 raise

1505 if self.print_ver:

1506 # Some cases don't return a version-specific URI (e.g., if destination

1507 # is a file).

1508 if hasattr(result_uri, 'version_specific_uri'):

1509 self.THREADED_LOGGER.info('Created: %s' %

1510 result_uri.version_specific_uri)

1511 else:

1512 self.THREADED_LOGGER.info('Created: %s' % result_uri.uri)

1513

1514 # TODO: If we ever use -n (noclobber) with -M (move) (not possible today

1515 # since we call copy internally from move and don't specify the -n flag)

1516 # we'll need to only remove the source when we have not skipped the

1517 # destination.

1518 if self.perform_mv:

1519 if not self.quiet:

1520 self.THREADED_LOGGER.info('Removing %s...', exp_src_uri)

1521 exp_src_uri.delete_key(validate=False, headers=self.headers)

1522 stats_lock.acquire()

1523 self.total_elapsed_time += elapsed_time

1524 self.total_bytes_transferred += bytes_transferred

1525 stats_lock.release()

1526

1527 # Start of RunCommand code.

1528 self._ParseArgs()

1529

1530 self.total_elapsed_time = self.total_bytes_transferred = 0

1531 if self.args[-1] == '-' or self.args[-1] == 'file://-':

1532 self._HandleStreamingDownload()

1533 return 0

1534

1535 if self.read_args_from_stdin:

1536 if len(self.args) != 1:

1537 raise CommandException('Source URIs cannot be specified with -I option')

1538 uri_strs = self._StdinIterator()

1539 else:

1540 if len(self.args) < 2:

1541 raise CommandException('Wrong number of arguments for "cp" command.')

1542 uri_strs = self.args[0:len(self.args)-1]

1543

1544 (exp_dst_uri, have_existing_dst_container) = self._ExpandDstUri(

1545 self.args[-1])

1546 name_expansion_iterator = NameExpansionIterator(

1547 self.command_name, self.proj_id_handler, self.headers, self.debug,

1548 self.bucket_storage_uri_class, uri_strs,

1549 self.recursion_requested or self.perform_mv,

1550 have_existing_dst_container)

1551

1552 # Use a lock to ensure accurate statistics in the face of

1553 # multi-threading/multi-processing.

1554 stats_lock = threading.Lock()

1555

1556 # Tracks if any copies failed.

1557 self.copy_failure_count = 0

1558

1559 # Start the clock.

1560 start_time = time.time()

1561

1562 # Tuple of attributes to share/manage across multiple processes in

1563 # parallel (-m) mode.

1564 shared_attrs = ('copy_failure_count', 'total_bytes_transferred')

1565

1566 # Perform copy requests in parallel (-m) mode, if requested, using

1567 # configured number of parallel processes and threads. Otherwise,

1568 # perform requests with sequential function calls in current process.

1569 self.Apply(_CopyFunc, name_expansion_iterator, _CopyExceptionHandler,

1570 shared_attrs)

1571 if self.debug:

1572 print 'total_bytes_transferred:' + str(self.total_bytes_transferred)

1573

1574 end_time = time.time()

1575 self.total_elapsed_time = end_time - start_time

1576

1577 # Sometimes, particularly when running unit tests, the total elapsed time

1578 # is really small. On Windows, the timer resolution is too small and

1579 # causes total_elapsed_time to be zero.

1580 try:

1581 float(self.total_bytes_transferred) / float(self.total_elapsed_time)

1582 except ZeroDivisionError:

1583 self.total_elapsed_time = 0.01

1584

1585 self.total_bytes_per_second = (float(self.total_bytes_transferred) /

1586 float(self.total_elapsed_time))

1587

1588 if self.debug == 3:

1589 # Note that this only counts the actual GET and PUT bytes for the copy

1590 # - not any transfers for doing wildcard expansion, the initial HEAD

1591 # request boto performs when doing a bucket.get_key() operation, etc.

1592 if self.total_bytes_transferred != 0:

1593 self.THREADED_LOGGER.info(

1594 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)',

1595 self.total_bytes_transferred, self.total_elapsed_time,

1596 MakeHumanReadable(self.total_bytes_per_second))

1597 if self.copy_failure_count:

1598 plural_str = ''

1599 if self.copy_failure_count > 1:

1600 plural_str = 's'

1601 raise CommandException('%d file%s/object%s could not be transferred.' % (

1602 self.copy_failure_count, plural_str, plural_str))

1603

1604 return 0

1605

1606 def _ParseArgs(self):

1607 self.perform_mv = False

1608 self.exclude_symlinks = False

1609 self.quiet = False

1610 self.no_clobber = False

1611 self.continue_on_error = False

1612 self.daisy_chain = False

1613 self.read_args_from_stdin = False

1614 self.print_ver = False

1615 # self.recursion_requested initialized in command.py (so can be checked

1616 # in parent class for all commands).

1617 if self.sub_opts:

1618 for o, unused_a in self.sub_opts:

1619 if o == '-c':

1620 self.continue_on_error = True

1621 elif o == '-D':

1622 self.daisy_chain = True

1623 elif o == '-e':

1624 self.exclude_symlinks = True

1625 elif o == '-I':

1626 self.read_args_from_stdin = True

1627 elif o == '-M':

1628 # Note that we signal to the cp command to perform a move (copy

1629 # followed by remove) and use directory-move naming rules by passing

1630 # the undocumented (for internal use) -M option when running the cp

1631 # command from mv.py.

1632 self.perform_mv = True

1633 elif o == '-n':

1634 self.no_clobber = True

1635 elif o == '-q':

1636 self.quiet = True

1637 elif o == '-r' or o == '-R':

1638 self.recursion_requested = True

1639 elif o == '-v':

1640 self.print_ver = True

1641

1642 def _HandleStreamingDownload(self):

1643 # Destination is <STDOUT>. Manipulate sys.stdout so as to redirect all

1644 # debug messages to <STDERR>.

1645 stdout_fp = sys.stdout

1646 sys.stdout = sys.stderr

1647 did_some_work = False

1648 for uri_str in self.args[0:len(self.args)-1]:

1649 for uri in self.WildcardIterator(uri_str).IterUris():

1650 did_some_work = True

1651 key = uri.get_key(False, self.headers)

1652 (elapsed_time, bytes_transferred) = self._PerformDownloadToStream(

1653 key, uri, stdout_fp, self.headers)

1654 self.total_elapsed_time += elapsed_time

1655 self.total_bytes_transferred += bytes_transferred

1656 if not did_some_work:

1657 raise CommandException('No URIs matched')

1658 if self.debug == 3:

1659 if self.total_bytes_transferred != 0:

1660 self.THREADED_LOGGER.info(

1661 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)',

1662 self.total_bytes_transferred, self.total_elapsed_time,

1663 MakeHumanReadable(float(self.total_bytes_transferred) /

1664 float(self.total_elapsed_time)))

1665

1666 def _StdinIterator(self):

1667 """A generator function that returns lines from stdin."""

1668 for line in sys.stdin:

1669 # Strip CRLF.

1670 yield line.rstrip()

1671

1672 def _SrcDstSame(self, src_uri, dst_uri):

1673 """Checks if src_uri and dst_uri represent the same object or file.

1674

1675 We don't handle anything about hard or symbolic links.

1676

1677 Args:

1678 src_uri: Source StorageUri.

1679 dst_uri: Destination StorageUri.

1680

1681 Returns:

1682 Bool indicator.

1683 """

1684 if src_uri.is_file_uri() and dst_uri.is_file_uri():

1685 # Translate a/b/./c to a/b/c, so src=dst comparison below works.

1686 new_src_path = os.path.normpath(src_uri.object_name)

1687 new_dst_path = os.path.normpath(dst_uri.object_name)

1688 return (src_uri.clone_replace_name(new_src_path).uri ==

1689 dst_uri.clone_replace_name(new_dst_path).uri)

1690 else:

1691 return (src_uri.uri == dst_uri.uri and

1692 src_uri.generation == dst_uri.generation and

1693 src_uri.version_id == dst_uri.version_id)

1694

1695 def _ShouldTreatDstUriAsBucketSubDir(self, have_multiple_srcs, dst_uri,

1696 have_existing_dest_subdir):

1697 """

1698 Checks whether dst_uri should be treated as a bucket "sub-directory". The

1699 decision about whether something constitutes a bucket "sub-directory"

1700 depends on whether there are multiple sources in this request and whether

1701 there is an existing bucket subdirectory. For example, when running the

1702 command:

1703 gsutil cp file gs://bucket/abc

1704 if there's no existing gs://bucket/abc bucket subdirectory we should copy

1705 file to the object gs://bucket/abc. In contrast, if

1706 there's an existing gs://bucket/abc bucket subdirectory we should copy

1707 file to gs://bucket/abc/file. And regardless of whether gs://bucket/abc

1708 exists, when running the command:

1709 gsutil cp file1 file2 gs://bucket/abc

1710 we should copy file1 to gs://bucket/abc/file1 (and similarly for file2).

1711

1712 Note that we don't disallow naming a bucket "sub-directory" where there's

1713 already an object at that URI. For example it's legitimate (albeit

1714 confusing) to have an object called gs://bucket/dir and

1715 then run the command

1716 gsutil cp file1 file2 gs://bucket/dir

1717 Doing so will end up with objects gs://bucket/dir, gs://bucket/dir/file1,

1718 and gs://bucket/dir/file2.

1719

1720 Args:

1721 have_multiple_srcs: Bool indicator of whether this is a multi-source

1722 operation.

1723 dst_uri: StorageUri to check.

1724 have_existing_dest_subdir: bool indicator whether dest is an existing

1725 subdirectory.

1726

1727 Returns:

1728 bool indicator.

1729 """

1730 return ((have_multiple_srcs and dst_uri.is_cloud_uri())

1731 or (have_existing_dest_subdir))

1732

1733 def _ShouldTreatDstUriAsSingleton(self, have_multiple_srcs,

1734 have_existing_dest_subdir, dst_uri):

1735 """

1736 Checks that dst_uri names a singleton (file or object) after

1737 dir/wildcard expansion. The decision is more nuanced than simply

1738 dst_uri.names_singleton()) because of the possibility that an object path

1739 might name a bucket sub-directory.

1740

1741 Args:

1742 have_multiple_srcs: Bool indicator of whether this is a multi-source

1743 operation.

1744 have_existing_dest_subdir: bool indicator whether dest is an existing

1745 subdirectory.

1746 dst_uri: StorageUri to check.

1747

1748 Returns:

1749 bool indicator.

1750 """

1751 if have_multiple_srcs:

1752 # Only a file meets the criteria in this case.

1753 return dst_uri.names_file()

1754 return not have_existing_dest_subdir and dst_uri.names_singleton()

1755

1756 def _IsNoClobberServerException(self, e):

1757 """

1758 Checks to see if the server attempted to clobber a file after we specified

1759 in the header that we didn't want the file clobbered.

1760

1761 Args:

1762 e: The Exception that was generated by a failed copy operation

1763

1764 Returns:

1765 bool indicator - True indicates that the server did attempt to clobber

1766 an existing file.

1767 """

1768 return self.no_clobber and (

1769 (isinstance(e, GSResponseError) and e.status==412) or

1770 (isinstance(e, ResumableUploadException) and 'code 412' in e.message))

1771

1772 def _GetPathBeforeFinalDir(uri):

1773 """

1774 Returns the part of the path before the final directory component for the

1775 given URI, handling cases for file system directories, bucket, and bucket

1776 subdirectories. Example: for gs://bucket/dir/ we'll return 'gs://bucket',

1777 and for file://dir we'll return file://

1778

1779 Args:

1780 uri: StorageUri.

1781

1782 Returns:

1783 String name of above-described path, sans final path separator.

1784 """

1785 sep = uri.delim

1786 assert not uri.names_file()

1787 if uri.names_directory():

1788 past_scheme = uri.uri[len('file://'):]

1789 if past_scheme.find(sep) == -1:

1790 return 'file://'

1791 else:

1792 return 'file://%s' % past_scheme.rstrip(sep).rpartition(sep)[0]

1793 if uri.names_bucket():

1794 return '%s://' % uri.scheme

1795 # Else it names a bucket subdir.

1796 return uri.uri.rstrip(sep).rpartition(sep)[0]

1797

1798 def _hash_filename(filename):

1799 """

1800 Apply a hash function (SHA1) to shorten the passed file name. The spec

1801 for the hashed file name is as follows:

1802

1803 TRACKER_<hash>_<trailing>

1804

1805 where hash is a SHA1 hash on the original file name and trailing is

1806 the last 16 chars from the original file name. Max file name lengths

1807 vary by operating system so the goal of this function is to ensure

1808 the hashed version takes fewer than 100 characters.

1809

1810 Args:

1811 filename: file name to be hashed.

1812

1813 Returns:

1814 shorter, hashed version of passed file name

1815 """

1816 if not isinstance(filename, unicode):

1817 filename = unicode(filename, 'utf8').encode('utf-8')

1818 m = hashlib.sha1(filename)

1819 return "TRACKER_" + m.hexdigest() + '.' + filename[-16:]

OLD	NEW

« no previous file with comments | « third_party/gsutil/gslib/commands/config.py ('k') | third_party/gsutil/gslib/commands/disablelogging.py » ('j') | no next file with comments »