OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
45 from boto.s3.connection import SubdomainCallingFormat | 45 from boto.s3.connection import SubdomainCallingFormat |
46 from boto.s3.prefix import Prefix | 46 from boto.s3.prefix import Prefix |
47 | 47 |
48 # How many files to upload at once, by default. | 48 # How many files to upload at once, by default. |
49 # TODO(epoger): Is there a way to compute this intelligently? To some extent | 49 # TODO(epoger): Is there a way to compute this intelligently? To some extent |
50 # it is a function of how many cores are on the machine, and how many other | 50 # it is a function of how many cores are on the machine, and how many other |
51 # processes it is running; but it's probably more a function of how much time | 51 # processes it is running; but it's probably more a function of how much time |
52 # each core sits idle waiting for network I/O to complete. | 52 # each core sits idle waiting for network I/O to complete. |
53 DEFAULT_UPLOAD_THREADS = 10 | 53 DEFAULT_UPLOAD_THREADS = 10 |
54 | 54 |
| 55 _GS_PREFIX = 'gs://' |
| 56 |
55 | 57 |
56 class AnonymousGSConnection(GSConnection): | 58 class AnonymousGSConnection(GSConnection): |
57 """GSConnection class that allows anonymous connections. | 59 """GSConnection class that allows anonymous connections. |
58 | 60 |
59 The GSConnection class constructor in | 61 The GSConnection class constructor in |
60 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow | 62 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow |
61 for anonymous connections (connections without credentials), so we have to | 63 for anonymous connections (connections without credentials), so we have to |
62 override it. | 64 override it. |
63 """ | 65 """ |
64 def __init__(self): | 66 def __init__(self): |
(...skipping 508 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
573 dirs = [] | 575 dirs = [] |
574 files = [] | 576 files = [] |
575 for item in items: | 577 for item in items: |
576 t = type(item) | 578 t = type(item) |
577 if t is Key: | 579 if t is Key: |
578 files.append(item.name[prefix_length:]) | 580 files.append(item.name[prefix_length:]) |
579 elif t is Prefix: | 581 elif t is Prefix: |
580 dirs.append(item.name[prefix_length:-1]) | 582 dirs.append(item.name[prefix_length:-1]) |
581 return (dirs, files) | 583 return (dirs, files) |
582 | 584 |
| 585 @staticmethod |
| 586 def is_gs_url(url): |
| 587 """Returns True if url is a legal Google Storage URL ("gs://bucket/file"). |
| 588 """ |
| 589 try: |
| 590 if url.lower().startswith(_GS_PREFIX) and len(url) > len(_GS_PREFIX): |
| 591 return url[len(_GS_PREFIX)].isalnum() |
| 592 else: |
| 593 return False |
| 594 except AttributeError: |
| 595 return False |
| 596 |
| 597 @staticmethod |
| 598 def split_gs_url(url): |
| 599 """Returns (bucket, filepath) corresponding to a legal Google Storage URL. |
| 600 |
| 601 Raises AttributeError if the input URL is not a legal Google Storage URL. |
| 602 """ |
| 603 if not GSUtils.is_gs_url(url): |
| 604 raise AttributeError('"%s" is not a legal Google Storage URL' % url) |
| 605 prefix_removed = url[len(_GS_PREFIX):] |
| 606 pathsep_index = prefix_removed.find('/') |
| 607 if pathsep_index < 0: |
| 608 return (prefix_removed, '') |
| 609 else: |
| 610 return (prefix_removed[:pathsep_index], |
| 611 prefix_removed[pathsep_index+1:].strip('/')) |
| 612 |
583 def _connect_to_bucket(self, bucket): | 613 def _connect_to_bucket(self, bucket): |
584 """Returns a Bucket object we can use to access a particular bucket in GS. | 614 """Returns a Bucket object we can use to access a particular bucket in GS. |
585 | 615 |
586 Params: | 616 Params: |
587 bucket: name of the bucket (e.g., 'chromium-skia-gm'), or a Bucket | 617 bucket: name of the bucket (e.g., 'chromium-skia-gm'), or a Bucket |
588 object (in which case this param is just returned as-is) | 618 object (in which case this param is just returned as-is) |
589 """ | 619 """ |
590 if type(bucket) is Bucket: | 620 if type(bucket) is Bucket: |
591 return bucket | 621 return bucket |
592 try: | 622 try: |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
645 | 675 |
646 def _get_local_md5(path): | 676 def _get_local_md5(path): |
647 """Returns the MD5 hash of a file on local disk.""" | 677 """Returns the MD5 hash of a file on local disk.""" |
648 hasher = hashlib.md5() | 678 hasher = hashlib.md5() |
649 with open(path, 'rb') as f: | 679 with open(path, 'rb') as f: |
650 while True: | 680 while True: |
651 data = f.read(64*1024) | 681 data = f.read(64*1024) |
652 if not data: | 682 if not data: |
653 return hasher.hexdigest() | 683 return hasher.hexdigest() |
654 hasher.update(data) | 684 hasher.update(data) |
OLD | NEW |