OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
(...skipping 19 matching lines...) Expand all Loading... |
30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( | 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
31 os.path.dirname(__file__), os.pardir, os.pardir)) | 31 os.path.dirname(__file__), os.pardir, os.pardir)) |
32 for import_subdir in ['boto']: | 32 for import_subdir in ['boto']: |
33 import_dirpath = os.path.join( | 33 import_dirpath = os.path.join( |
34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) | 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) |
35 if import_dirpath not in sys.path: | 35 if import_dirpath not in sys.path: |
36 # We need to insert at the beginning of the path, to make sure that our | 36 # We need to insert at the beginning of the path, to make sure that our |
37 # imported versions are favored over others that might be in the path. | 37 # imported versions are favored over others that might be in the path. |
38 sys.path.insert(0, import_dirpath) | 38 sys.path.insert(0, import_dirpath) |
39 from boto.gs import acl | 39 from boto.gs import acl |
| 40 from boto.gs.bucket import Bucket |
40 from boto.gs.connection import GSConnection | 41 from boto.gs.connection import GSConnection |
41 from boto.gs.key import Key | 42 from boto.gs.key import Key |
42 from boto.s3.bucketlistresultset import BucketListResultSet | 43 from boto.s3.bucketlistresultset import BucketListResultSet |
| 44 from boto.s3.connection import SubdomainCallingFormat |
43 from boto.s3.prefix import Prefix | 45 from boto.s3.prefix import Prefix |
44 | 46 |
45 # Permissions that may be set on each file in Google Storage. | 47 # Permissions that may be set on each file in Google Storage. |
46 # See SupportedPermissions in | 48 # See SupportedPermissions in |
47 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py | 49 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py |
48 PERMISSION_NONE = None | 50 PERMISSION_NONE = None |
49 PERMISSION_OWNER = 'FULL_CONTROL' | 51 PERMISSION_OWNER = 'FULL_CONTROL' |
50 PERMISSION_READ = 'READ' | 52 PERMISSION_READ = 'READ' |
51 PERMISSION_WRITE = 'WRITE' | 53 PERMISSION_WRITE = 'WRITE' |
52 | 54 |
53 # Types of identifiers we can use to set ACLs. | 55 # Types of identifiers we can use to set ACLs. |
54 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | 56 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN |
55 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | 57 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL |
56 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID | 58 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID |
57 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL | 59 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL |
58 ID_TYPE_USER_BY_ID = acl.USER_BY_ID | 60 ID_TYPE_USER_BY_ID = acl.USER_BY_ID |
59 | 61 |
60 # Which field we get/set in ACL entries, depending on ID_TYPE. | 62 # Which field we get/set in ACL entries, depending on ID_TYPE. |
61 FIELD_BY_ID_TYPE = { | 63 FIELD_BY_ID_TYPE = { |
62 ID_TYPE_GROUP_BY_DOMAIN: 'domain', | 64 ID_TYPE_GROUP_BY_DOMAIN: 'domain', |
63 ID_TYPE_GROUP_BY_EMAIL: 'email_address', | 65 ID_TYPE_GROUP_BY_EMAIL: 'email_address', |
64 ID_TYPE_GROUP_BY_ID: 'id', | 66 ID_TYPE_GROUP_BY_ID: 'id', |
65 ID_TYPE_USER_BY_EMAIL: 'email_address', | 67 ID_TYPE_USER_BY_EMAIL: 'email_address', |
66 ID_TYPE_USER_BY_ID: 'id', | 68 ID_TYPE_USER_BY_ID: 'id', |
67 } | 69 } |
68 | 70 |
69 | 71 |
| 72 class AnonymousGSConnection(GSConnection): |
| 73 """GSConnection class that allows anonymous connections. |
| 74 |
| 75 The GSConnection class constructor in |
| 76 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow |
| 77 for anonymous connections (connections without credentials), so we have to |
| 78 override it. |
| 79 """ |
| 80 def __init__(self): |
| 81 super(GSConnection, self).__init__( |
| 82 # This is the important bit we need to add... |
| 83 anon=True, |
| 84 # ...and these are just copied in from GSConnection.__init__() |
| 85 bucket_class=Bucket, |
| 86 calling_format=SubdomainCallingFormat(), |
| 87 host=GSConnection.DefaultHost, |
| 88 provider='google') |
| 89 |
| 90 |
70 class GSUtils(object): | 91 class GSUtils(object): |
71 """Utilities for accessing Google Cloud Storage, using the boto library.""" | 92 """Utilities for accessing Google Cloud Storage, using the boto library.""" |
72 | 93 |
73 def __init__(self, boto_file_path=os.path.join('~','.boto')): | 94 def __init__(self, boto_file_path=None): |
74 """Constructor. | 95 """Constructor. |
75 | 96 |
76 Params: | 97 Params: |
77 boto_file_path: full path (local-OS-style) on local disk where .boto | 98 boto_file_path: full path (local-OS-style) on local disk where .boto |
78 credentials file can be found. An exception is thrown if this file | 99 credentials file can be found. If None, then the GSUtils object |
79 is missing. | 100 created will be able to access only public files in Google Storage. |
80 TODO(epoger): Change missing-file behavior: allow the caller to | 101 |
81 operate on public files in Google Storage. | 102 Raises an exception if no file is found at boto_file_path, or if the file |
| 103 found there is malformed. |
82 """ | 104 """ |
83 boto_file_path = os.path.expanduser(boto_file_path) | 105 self._gs_access_key_id = None |
84 print 'Reading boto file from %s' % boto_file_path | 106 self._gs_secret_access_key = None |
85 boto_dict = _config_file_as_dict(filepath=boto_file_path) | 107 if boto_file_path: |
86 self._gs_access_key_id = boto_dict['gs_access_key_id'] | 108 print 'Reading boto file from %s' % boto_file_path |
87 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] | 109 boto_dict = _config_file_as_dict(filepath=boto_file_path) |
| 110 self._gs_access_key_id = boto_dict['gs_access_key_id'] |
| 111 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] |
88 | 112 |
89 def delete_file(self, bucket, path): | 113 def delete_file(self, bucket, path): |
90 """Delete a single file within a GS bucket. | 114 """Delete a single file within a GS bucket. |
91 | 115 |
92 TODO(epoger): what if bucket or path does not exist? Should probably raise | 116 TODO(epoger): what if bucket or path does not exist? Should probably raise |
93 an exception. Implement, and add a test to exercise this. | 117 an exception. Implement, and add a test to exercise this. |
94 | 118 |
95 Params: | 119 Params: |
96 bucket: GS bucket to delete a file from | 120 bucket: GS bucket to delete a file from |
97 path: full path (Posix-style) of the file within the bucket to delete | 121 path: full path (Posix-style) of the file within the bucket to delete |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
251 for item in lister: | 275 for item in lister: |
252 t = type(item) | 276 t = type(item) |
253 if t is Key: | 277 if t is Key: |
254 files.append(item.key[prefix_length:]) | 278 files.append(item.key[prefix_length:]) |
255 elif t is Prefix: | 279 elif t is Prefix: |
256 dirs.append(item.name[prefix_length:-1]) | 280 dirs.append(item.name[prefix_length:-1]) |
257 return (dirs, files) | 281 return (dirs, files) |
258 | 282 |
259 def _create_connection(self): | 283 def _create_connection(self): |
260 """Returns a GSConnection object we can use to access Google Storage.""" | 284 """Returns a GSConnection object we can use to access Google Storage.""" |
261 return GSConnection( | 285 if self._gs_access_key_id: |
262 gs_access_key_id=self._gs_access_key_id, | 286 return GSConnection( |
263 gs_secret_access_key=self._gs_secret_access_key) | 287 gs_access_key_id=self._gs_access_key_id, |
264 | 288 gs_secret_access_key=self._gs_secret_access_key) |
| 289 else: |
| 290 return AnonymousGSConnection() |
265 | 291 |
266 def _config_file_as_dict(filepath): | 292 def _config_file_as_dict(filepath): |
267 """Reads a boto-style config file into a dict. | 293 """Reads a boto-style config file into a dict. |
268 | 294 |
269 Parses all lines from the file of this form: key = value | 295 Parses all lines from the file of this form: key = value |
270 TODO(epoger): Create unittest. | 296 TODO(epoger): Create unittest. |
271 | 297 |
272 Params: | 298 Params: |
273 filepath: path to config file on local disk | 299 filepath: path to config file on local disk |
274 | 300 |
(...skipping 19 matching lines...) Expand all Loading... |
294 Args: | 320 Args: |
295 path: full path of directory to create | 321 path: full path of directory to create |
296 """ | 322 """ |
297 try: | 323 try: |
298 os.makedirs(path) | 324 os.makedirs(path) |
299 except OSError as e: | 325 except OSError as e: |
300 if e.errno != errno.EEXIST: | 326 if e.errno != errno.EEXIST: |
301 raise | 327 raise |
302 | 328 |
303 | 329 |
304 def _run_self_test(): | 330 def _test_public_read(): |
| 331 """Make sure we can read from public files without .boto file credentials.""" |
| 332 gs = GSUtils() |
| 333 gs.list_bucket_contents(bucket='chromium-skia-gm-summaries', subdir=None) |
| 334 |
| 335 |
| 336 def _test_authenticated_round_trip(): |
| 337 try: |
| 338 gs = GSUtils(boto_file_path=os.path.expanduser(os.path.join('~','.boto'))) |
| 339 except: |
| 340 print """ |
| 341 Failed to instantiate GSUtils object with default .boto file path. |
| 342 Do you have a ~/.boto file that provides the credentials needed to read |
| 343 and write gs://chromium-skia-gm ? |
| 344 """ |
| 345 raise |
| 346 |
305 bucket = 'chromium-skia-gm' | 347 bucket = 'chromium-skia-gm' |
306 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) | 348 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
307 subdir = 'subdir' | 349 subdir = 'subdir' |
308 filenames_to_upload = ['file1', 'file2'] | 350 filenames_to_upload = ['file1', 'file2'] |
309 gs = GSUtils() | |
310 | 351 |
311 # Upload test files to Google Storage. | 352 # Upload test files to Google Storage. |
312 local_src_dir = tempfile.mkdtemp() | 353 local_src_dir = tempfile.mkdtemp() |
313 os.mkdir(os.path.join(local_src_dir, subdir)) | 354 os.mkdir(os.path.join(local_src_dir, subdir)) |
314 try: | 355 try: |
315 for filename in filenames_to_upload: | 356 for filename in filenames_to_upload: |
316 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: | 357 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
317 f.write('contents of %s\n' % filename) | 358 f.write('contents of %s\n' % filename) |
318 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), | 359 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |
319 dest_bucket=bucket, | 360 dest_bucket=bucket, |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
386 gs.delete_file(bucket=bucket, | 427 gs.delete_file(bucket=bucket, |
387 path=posixpath.join(remote_dir, subdir, filename)) | 428 path=posixpath.join(remote_dir, subdir, filename)) |
388 | 429 |
389 # Confirm that we deleted all the files we uploaded to Google Storage. | 430 # Confirm that we deleted all the files we uploaded to Google Storage. |
390 (dirs, files) = gs.list_bucket_contents( | 431 (dirs, files) = gs.list_bucket_contents( |
391 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 432 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
392 assert dirs == [], '%s == []' % dirs | 433 assert dirs == [], '%s == []' % dirs |
393 assert files == [], '%s == []' % files | 434 assert files == [], '%s == []' % files |
394 | 435 |
395 | 436 |
396 # TODO(epoger): How should we exercise this self-test? | 437 # TODO(epoger): How should we exercise these self-tests? |
397 # I avoided using the standard unittest framework, because these Google Storage | 438 # See http://skbug.com/2751 |
398 # operations are expensive and require .boto permissions. | |
399 # | |
400 # How can we automatically test this code without wasting too many resources | |
401 # or needing .boto permissions? | |
402 if __name__ == '__main__': | 439 if __name__ == '__main__': |
403 _run_self_test() | 440 _test_public_read() |
| 441 _test_authenticated_round_trip() |
| 442 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise |
| 443 # an exception when we try to access without needed credentials. |
OLD | NEW |