Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(148)

Side by Side Diff: py/utils/gs_utils.py

Issue 661563002: Add does_storage_object_exist to gs_utils (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: Work harder to find .boto Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 from boto.s3.connection import SubdomainCallingFormat 45 from boto.s3.connection import SubdomainCallingFormat
46 from boto.s3.prefix import Prefix 46 from boto.s3.prefix import Prefix
47 47
48 # How many files to upload at once, by default. 48 # How many files to upload at once, by default.
49 # TODO(epoger): Is there a way to compute this intelligently? To some extent 49 # TODO(epoger): Is there a way to compute this intelligently? To some extent
50 # it is a function of how many cores are on the machine, and how many other 50 # it is a function of how many cores are on the machine, and how many other
51 # processes it is running; but it's probably more a function of how much time 51 # processes it is running; but it's probably more a function of how much time
52 # each core sits idle waiting for network I/O to complete. 52 # each core sits idle waiting for network I/O to complete.
53 DEFAULT_UPLOAD_THREADS = 10 53 DEFAULT_UPLOAD_THREADS = 10
54 54
55 _GS_PREFIX = 'gs://' 55 GS_PREFIX = 'gs://'
56 56
57 57
58 class AnonymousGSConnection(GSConnection): 58 class AnonymousGSConnection(GSConnection):
59 """GSConnection class that allows anonymous connections. 59 """GSConnection class that allows anonymous connections.
60 60
61 The GSConnection class constructor in 61 The GSConnection class constructor in
62 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow 62 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow
63 for anonymous connections (connections without credentials), so we have to 63 for anonymous connections (connections without credentials), so we have to
64 override it. 64 override it.
65 """ 65 """
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
125 IF_NEW = 2 # if there is an existing file with the same name, 125 IF_NEW = 2 # if there is an existing file with the same name,
126 # leave it alone 126 # leave it alone
127 IF_MODIFIED = 3 # if there is an existing file with the same name and 127 IF_MODIFIED = 3 # if there is an existing file with the same name and
128 # contents, leave it alone 128 # contents, leave it alone
129 129
130 def __init__(self, boto_file_path=None): 130 def __init__(self, boto_file_path=None):
131 """Constructor. 131 """Constructor.
132 132
133 Params: 133 Params:
134 boto_file_path: full path (local-OS-style) on local disk where .boto 134 boto_file_path: full path (local-OS-style) on local disk where .boto
135 credentials file can be found. If None, then the GSUtils object 135 credentials file can be found. If None, fall back on the
136 created will be able to access only public files in Google Storage. 136 AWS_CREDENTIAL_FILE environment variable, then look in a set of
137 common paths for the .boto file. If no .boto file is found, then the
138 GSUtils object created will be able to access only public files in
139 Google Storage.
137 140
138 Raises an exception if no file is found at boto_file_path, or if the file 141 Raises an exception if no file is found at boto_file_path, or if the file
139 found there is malformed. 142 found there is malformed.
140 """ 143 """
141 self._gs_access_key_id = None 144 self._gs_access_key_id = None
142 self._gs_secret_access_key = None 145 self._gs_secret_access_key = None
146 if not boto_file_path:
147 if os.environ.get('AWS_CREDENTIAL_FILE'):
148 boto_file_path = os.path.expanduser(os.environ['AWS_CREDENTIAL_FILE'])
149 if not boto_file_path:
rmistry 2014/10/16 18:09:58 Remove this if since you already have it above.
borenet 2014/10/16 18:12:51 This is only intended to run if AWS_CREDENTIAL_FIL
150 for path in (os.path.join(os.path.expanduser('~'), '.boto'),):
151 if os.path.isfile(path):
152 boto_file_path = path
153 break
154
143 if boto_file_path: 155 if boto_file_path:
144 print ('Reading boto file from %s' % boto_file_path) 156 print ('Reading boto file from %s' % boto_file_path)
145 boto_dict = _config_file_as_dict(filepath=boto_file_path) 157 boto_dict = _config_file_as_dict(filepath=boto_file_path)
146 self._gs_access_key_id = boto_dict['gs_access_key_id'] 158 self._gs_access_key_id = boto_dict['gs_access_key_id']
147 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] 159 self._gs_secret_access_key = boto_dict['gs_secret_access_key']
160 else:
161 print >> sys.stderr, 'Warning: no .boto file found.'
162
148 # Which field we get/set in ACL entries, depending on IdType. 163 # Which field we get/set in ACL entries, depending on IdType.
149 self._field_by_id_type = { 164 self._field_by_id_type = {
150 self.IdType.GROUP_BY_DOMAIN: 'domain', 165 self.IdType.GROUP_BY_DOMAIN: 'domain',
151 self.IdType.GROUP_BY_EMAIL: 'email_address', 166 self.IdType.GROUP_BY_EMAIL: 'email_address',
152 self.IdType.GROUP_BY_ID: 'id', 167 self.IdType.GROUP_BY_ID: 'id',
153 self.IdType.USER_BY_EMAIL: 'email_address', 168 self.IdType.USER_BY_EMAIL: 'email_address',
154 self.IdType.USER_BY_ID: 'id', 169 self.IdType.USER_BY_ID: 'id',
155 } 170 }
156 171
157 def delete_file(self, bucket, path): 172 def delete_file(self, bucket, path):
(...skipping 417 matching lines...) Expand 10 before | Expand all | Expand 10 after
575 dirs = [] 590 dirs = []
576 files = [] 591 files = []
577 for item in items: 592 for item in items:
578 t = type(item) 593 t = type(item)
579 if t is Key: 594 if t is Key:
580 files.append(item.name[prefix_length:]) 595 files.append(item.name[prefix_length:])
581 elif t is Prefix: 596 elif t is Prefix:
582 dirs.append(item.name[prefix_length:-1]) 597 dirs.append(item.name[prefix_length:-1])
583 return (dirs, files) 598 return (dirs, files)
584 599
600 def does_storage_object_exist(self, bucket, object_name):
601 """Determines whether an object exists in Google Storage.
602
603 Returns True if it exists else returns False.
604 """
605 b = self._connect_to_bucket(bucket=bucket)
606 if object_name in b:
607 return True
608 dirs, files = self.list_bucket_contents(bucket, object_name)
609 return bool(dirs or files)
610
585 @staticmethod 611 @staticmethod
586 def is_gs_url(url): 612 def is_gs_url(url):
587 """Returns True if url is a legal Google Storage URL ("gs://bucket/file"). 613 """Returns True if url is a legal Google Storage URL ("gs://bucket/file").
588 """ 614 """
589 try: 615 try:
590 if url.lower().startswith(_GS_PREFIX) and len(url) > len(_GS_PREFIX): 616 if url.lower().startswith(GS_PREFIX) and len(url) > len(GS_PREFIX):
591 return url[len(_GS_PREFIX)].isalnum() 617 return url[len(GS_PREFIX)].isalnum()
592 else: 618 else:
593 return False 619 return False
594 except AttributeError: 620 except AttributeError:
595 return False 621 return False
596 622
597 @staticmethod 623 @staticmethod
598 def split_gs_url(url): 624 def split_gs_url(url):
599 """Returns (bucket, filepath) corresponding to a legal Google Storage URL. 625 """Returns (bucket, filepath) corresponding to a legal Google Storage URL.
600 626
601 Raises AttributeError if the input URL is not a legal Google Storage URL. 627 Raises AttributeError if the input URL is not a legal Google Storage URL.
602 """ 628 """
603 if not GSUtils.is_gs_url(url): 629 if not GSUtils.is_gs_url(url):
604 raise AttributeError('"%s" is not a legal Google Storage URL' % url) 630 raise AttributeError('"%s" is not a legal Google Storage URL' % url)
605 prefix_removed = url[len(_GS_PREFIX):] 631 prefix_removed = url[len(GS_PREFIX):]
606 pathsep_index = prefix_removed.find('/') 632 pathsep_index = prefix_removed.find('/')
607 if pathsep_index < 0: 633 if pathsep_index < 0:
608 return (prefix_removed, '') 634 return (prefix_removed, '')
609 else: 635 else:
610 return (prefix_removed[:pathsep_index], 636 return (prefix_removed[:pathsep_index],
611 prefix_removed[pathsep_index+1:].strip('/')) 637 prefix_removed[pathsep_index+1:].strip('/'))
612 638
613 def _connect_to_bucket(self, bucket): 639 def _connect_to_bucket(self, bucket):
614 """Returns a Bucket object we can use to access a particular bucket in GS. 640 """Returns a Bucket object we can use to access a particular bucket in GS.
615 641
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
675 701
676 def _get_local_md5(path): 702 def _get_local_md5(path):
677 """Returns the MD5 hash of a file on local disk.""" 703 """Returns the MD5 hash of a file on local disk."""
678 hasher = hashlib.md5() 704 hasher = hashlib.md5()
679 with open(path, 'rb') as f: 705 with open(path, 'rb') as f:
680 while True: 706 while True:
681 data = f.read(64*1024) 707 data = f.read(64*1024)
682 if not data: 708 if not data:
683 return hasher.hexdigest() 709 return hasher.hexdigest()
684 hasher.update(data) 710 hasher.update(data)
OLDNEW
« .gitignore ('K') | « .gitignore ('k') | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698