OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 # Copyright 2014 Google Inc. All Rights Reserved. | 2 # Copyright 2014 Google Inc. All Rights Reserved. |
3 # | 3 # |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
5 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
6 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
7 # | 7 # |
8 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
9 # | 9 # |
10 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
(...skipping 13 matching lines...) Expand all Loading... |
24 import re | 24 import re |
25 import tempfile | 25 import tempfile |
26 import textwrap | 26 import textwrap |
27 import traceback | 27 import traceback |
28 import urllib | 28 import urllib |
29 | 29 |
30 from boto import config | 30 from boto import config |
31 import crcmod | 31 import crcmod |
32 | 32 |
33 from gslib import copy_helper | 33 from gslib import copy_helper |
| 34 from gslib.bucket_listing_ref import BucketListingObject |
34 from gslib.cloud_api import NotFoundException | 35 from gslib.cloud_api import NotFoundException |
35 from gslib.command import Command | 36 from gslib.command import Command |
36 from gslib.command import DummyArgChecker | 37 from gslib.command import DummyArgChecker |
37 from gslib.command_argument import CommandArgument | 38 from gslib.command_argument import CommandArgument |
38 from gslib.copy_helper import CreateCopyHelperOpts | 39 from gslib.copy_helper import CreateCopyHelperOpts |
39 from gslib.copy_helper import SkipUnsupportedObjectError | 40 from gslib.copy_helper import SkipUnsupportedObjectError |
40 from gslib.cs_api_map import ApiSelector | 41 from gslib.cs_api_map import ApiSelector |
41 from gslib.exception import CommandException | 42 from gslib.exception import CommandException |
42 from gslib.hashing_helper import CalculateB64EncodedCrc32cFromContents | 43 from gslib.hashing_helper import CalculateB64EncodedCrc32cFromContents |
43 from gslib.hashing_helper import CalculateB64EncodedMd5FromContents | 44 from gslib.hashing_helper import CalculateB64EncodedMd5FromContents |
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
246 you are running a Python library for computing CRC32C, which is much slower | 247 you are running a Python library for computing CRC32C, which is much slower |
247 than using the compiled code. For information on getting a compiled CRC32C | 248 than using the compiled code. For information on getting a compiled CRC32C |
248 implementation, see 'gsutil help crc32c'. | 249 implementation, see 'gsutil help crc32c'. |
249 | 250 |
250 | 251 |
251 <B>LIMITATIONS</B> | 252 <B>LIMITATIONS</B> |
252 1. The gsutil rsync command doesn't make the destination object's timestamps | 253 1. The gsutil rsync command doesn't make the destination object's timestamps |
253 match those of the source object (it can't; timestamp setting is not | 254 match those of the source object (it can't; timestamp setting is not |
254 allowed by the GCS API). | 255 allowed by the GCS API). |
255 | 256 |
256 2. The gsutil rsync command ignores versioning, synchronizing only the live | 257 2. The gsutil rsync command considers only the current object generations in |
257 object versions in versioned buckets. | 258 the source and destination buckets when deciding what to copy / delete. If |
| 259 versioning is enabled in the destination bucket then gsutil rsync's |
| 260 overwriting or deleting objects will end up creating versions, but the |
| 261 command doesn't try to make the archived generations match in the source |
| 262 and destination buckets. |
| 263 |
258 | 264 |
259 | 265 |
260 <B>OPTIONS</B> | 266 <B>OPTIONS</B> |
261 -c Causes the rsync command to compute checksums for files if the | 267 -c Causes the rsync command to compute checksums for files if the |
262 size of source and destination match, and then compare | 268 size of source and destination match, and then compare |
263 checksums. This option increases local disk I/O and run time | 269 checksums. This option increases local disk I/O and run time |
264 if either src_url or dst_url are on the local file system. | 270 if either src_url or dst_url are on the local file system. |
265 | 271 |
266 -C If an error occurs, continue to attempt to copy the remaining | 272 -C If an error occurs, continue to attempt to copy the remaining |
267 files. If errors occurred, gsutil's exit status will be non-zero | 273 files. If errors occurred, gsutil's exit status will be non-zero |
(...skipping 28 matching lines...) Expand all Loading... |
296 rsync -p if you want all objects in the destination bucket to | 302 rsync -p if you want all objects in the destination bucket to |
297 end up with the same ACL by setting a default object ACL on that | 303 end up with the same ACL by setting a default object ACL on that |
298 bucket instead of using rsync -p. See 'help gsutil defacl'. | 304 bucket instead of using rsync -p. See 'help gsutil defacl'. |
299 | 305 |
300 -R, -r Causes directories, buckets, and bucket subdirectories to be | 306 -R, -r Causes directories, buckets, and bucket subdirectories to be |
301 synchronized recursively. If you neglect to use this option | 307 synchronized recursively. If you neglect to use this option |
302 gsutil will make only the top-level directory in the source | 308 gsutil will make only the top-level directory in the source |
303 and destination URLs match, skipping any sub-directories. | 309 and destination URLs match, skipping any sub-directories. |
304 | 310 |
305 -U Skip objects with unsupported object types instead of failing. | 311 -U Skip objects with unsupported object types instead of failing. |
306 Unsupported object types are s3 glacier objects. | 312 Unsupported object types are Amazon S3 Objects in the GLACIER |
| 313 storage class. |
307 | 314 |
308 -x pattern Causes files/objects matching pattern to be excluded, i.e., any | 315 -x pattern Causes files/objects matching pattern to be excluded, i.e., any |
309 matching files/objects will not be copied or deleted. Note that | 316 matching files/objects will not be copied or deleted. Note that |
310 the pattern is a Python regular expression, not a wildcard (so, | 317 the pattern is a Python regular expression, not a wildcard (so, |
311 matching any string ending in 'abc' would be specified using | 318 matching any string ending in 'abc' would be specified using |
312 '.*abc' rather than '*abc'). Note also that the exclude path is | 319 '.*abc' rather than '*abc'). Note also that the exclude path is |
313 always relative (similar to Unix rsync or tar exclude options). | 320 always relative (similar to Unix rsync or tar exclude options). |
314 For example, if you run the command: | 321 For example, if you run the command: |
315 | 322 |
316 gsutil rsync -x 'data./.*\\.txt' dir gs://my-bucket | 323 gsutil rsync -x 'data./.*\\.txt' dir gs://my-bucket |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
465 # futile or could result in data loss - for example: | 472 # futile or could result in data loss - for example: |
466 # gsutil rsync -d gs://non-existent-bucket ./localdir | 473 # gsutil rsync -d gs://non-existent-bucket ./localdir |
467 # would delete files from localdir. | 474 # would delete files from localdir. |
468 cls.logger.error( | 475 cls.logger.error( |
469 'Caught non-retryable exception while listing %s: %s' % | 476 'Caught non-retryable exception while listing %s: %s' % |
470 (base_url_str, e)) | 477 (base_url_str, e)) |
471 cls.non_retryable_listing_failures = 1 | 478 cls.non_retryable_listing_failures = 1 |
472 out_file.close() | 479 out_file.close() |
473 | 480 |
474 | 481 |
| 482 def _LocalDirIterator(base_url): |
| 483 """A generator that yields a BLR for each file in a local directory. |
| 484 |
| 485 We use this function instead of WildcardIterator for listing a local |
| 486 directory without recursion, because the glob.globi implementation called |
| 487 by WildcardIterator skips "dot" files (which we don't want to do when |
| 488 synchronizing to or from a local directory). |
| 489 |
| 490 Args: |
| 491 base_url: URL for the directory over which to iterate. |
| 492 |
| 493 Yields: |
| 494 BucketListingObject for each file in the directory. |
| 495 """ |
| 496 for filename in os.listdir(base_url.object_name): |
| 497 filename = os.path.join(base_url.object_name, filename) |
| 498 if os.path.isfile(filename): |
| 499 yield BucketListingObject(StorageUrlFromString(filename), None) |
| 500 |
| 501 |
475 def _FieldedListingIterator(cls, gsutil_api, base_url_str, desc): | 502 def _FieldedListingIterator(cls, gsutil_api, base_url_str, desc): |
476 """Iterator over base_url_str formatting output per _BuildTmpOutputLine. | 503 """Iterator over base_url_str formatting output per _BuildTmpOutputLine. |
477 | 504 |
478 Args: | 505 Args: |
479 cls: Command instance. | 506 cls: Command instance. |
480 gsutil_api: gsutil Cloud API instance to use for bucket listing. | 507 gsutil_api: gsutil Cloud API instance to use for bucket listing. |
481 base_url_str: The top-level URL string over which to iterate. | 508 base_url_str: The top-level URL string over which to iterate. |
482 desc: 'source' or 'destination'. | 509 desc: 'source' or 'destination'. |
483 | 510 |
484 Yields: | 511 Yields: |
485 Output line formatted per _BuildTmpOutputLine. | 512 Output line formatted per _BuildTmpOutputLine. |
486 """ | 513 """ |
487 if cls.recursion_requested: | 514 base_url = StorageUrlFromString(base_url_str) |
488 wildcard = '%s/**' % base_url_str.rstrip('/\\') | 515 if base_url.scheme == 'file' and not cls.recursion_requested: |
| 516 iterator = _LocalDirIterator(base_url) |
489 else: | 517 else: |
490 wildcard = '%s/*' % base_url_str.rstrip('/\\') | 518 if cls.recursion_requested: |
| 519 wildcard = '%s/**' % base_url_str.rstrip('/\\') |
| 520 else: |
| 521 wildcard = '%s/*' % base_url_str.rstrip('/\\') |
| 522 iterator = CreateWildcardIterator( |
| 523 wildcard, gsutil_api, debug=cls.debug, |
| 524 project_id=cls.project_id).IterObjects( |
| 525 # Request just the needed fields, to reduce bandwidth usage. |
| 526 bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size']) |
| 527 |
491 i = 0 | 528 i = 0 |
492 for blr in CreateWildcardIterator( | 529 for blr in iterator: |
493 wildcard, gsutil_api, debug=cls.debug, | |
494 project_id=cls.project_id).IterObjects( | |
495 # Request just the needed fields, to reduce bandwidth usage. | |
496 bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size']): | |
497 # Various GUI tools (like the GCS web console) create placeholder objects | 530 # Various GUI tools (like the GCS web console) create placeholder objects |
498 # ending with '/' when the user creates an empty directory. Normally these | 531 # ending with '/' when the user creates an empty directory. Normally these |
499 # tools should delete those placeholders once objects have been written | 532 # tools should delete those placeholders once objects have been written |
500 # "under" the directory, but sometimes the placeholders are left around. | 533 # "under" the directory, but sometimes the placeholders are left around. |
501 # We need to filter them out here, otherwise if the user tries to rsync | 534 # We need to filter them out here, otherwise if the user tries to rsync |
502 # from GCS to a local directory it will result in a directory/file | 535 # from GCS to a local directory it will result in a directory/file |
503 # conflict (e.g., trying to download an object called "mydata/" where the | 536 # conflict (e.g., trying to download an object called "mydata/" where the |
504 # local directory "mydata" exists). | 537 # local directory "mydata" exists). |
505 url = blr.storage_url | 538 url = blr.storage_url |
506 if IsCloudSubdirPlaceholder(url, blr=blr): | 539 if IsCloudSubdirPlaceholder(url, blr=blr): |
507 cls.logger.info('Skipping cloud sub-directory placeholder object (%s) ' | 540 # We used to output the message 'Skipping cloud sub-directory placeholder |
508 'because such objects aren\'t needed in (and would ' | 541 # object...' but we no longer do so because it caused customer confusion. |
509 'interfere with) directories in the local file system', | |
510 url) | |
511 continue | 542 continue |
512 if (cls.exclude_symlinks and url.IsFileUrl() | 543 if (cls.exclude_symlinks and url.IsFileUrl() |
513 and os.path.islink(url.object_name)): | 544 and os.path.islink(url.object_name)): |
514 continue | 545 continue |
515 if cls.exclude_pattern: | 546 if cls.exclude_pattern: |
516 str_to_check = url.url_string[len(base_url_str):] | 547 str_to_check = url.url_string[len(base_url_str):] |
517 if str_to_check.startswith(url.delim): | 548 if str_to_check.startswith(url.delim): |
518 str_to_check = str_to_check[1:] | 549 str_to_check = str_to_check[1:] |
519 if cls.exclude_pattern.match(str_to_check): | 550 if cls.exclude_pattern.match(str_to_check): |
520 continue | 551 continue |
(...skipping 502 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1023 elif o == '-x': | 1054 elif o == '-x': |
1024 if not a: | 1055 if not a: |
1025 raise CommandException('Invalid blank exclude filter') | 1056 raise CommandException('Invalid blank exclude filter') |
1026 try: | 1057 try: |
1027 self.exclude_pattern = re.compile(a) | 1058 self.exclude_pattern = re.compile(a) |
1028 except re.error: | 1059 except re.error: |
1029 raise CommandException('Invalid exclude filter (%s)' % a) | 1060 raise CommandException('Invalid exclude filter (%s)' % a) |
1030 return CreateCopyHelperOpts( | 1061 return CreateCopyHelperOpts( |
1031 preserve_acl=preserve_acl, | 1062 preserve_acl=preserve_acl, |
1032 skip_unsupported_objects=self.skip_unsupported_objects) | 1063 skip_unsupported_objects=self.skip_unsupported_objects) |
OLD | NEW |