OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
(...skipping 19 matching lines...) Expand all Loading... | |
30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( | 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
31 os.path.dirname(__file__), os.pardir, os.pardir)) | 31 os.path.dirname(__file__), os.pardir, os.pardir)) |
32 for import_subdir in ['boto']: | 32 for import_subdir in ['boto']: |
33 import_dirpath = os.path.join( | 33 import_dirpath = os.path.join( |
34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) | 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) |
35 if import_dirpath not in sys.path: | 35 if import_dirpath not in sys.path: |
36 # We need to insert at the beginning of the path, to make sure that our | 36 # We need to insert at the beginning of the path, to make sure that our |
37 # imported versions are favored over others that might be in the path. | 37 # imported versions are favored over others that might be in the path. |
38 sys.path.insert(0, import_dirpath) | 38 sys.path.insert(0, import_dirpath) |
39 from boto.gs import acl | 39 from boto.gs import acl |
40 from boto.gs.bucket import Bucket | |
40 from boto.gs.connection import GSConnection | 41 from boto.gs.connection import GSConnection |
41 from boto.gs.key import Key | 42 from boto.gs.key import Key |
42 from boto.s3.bucketlistresultset import BucketListResultSet | 43 from boto.s3.bucketlistresultset import BucketListResultSet |
44 from boto.s3.connection import SubdomainCallingFormat | |
43 from boto.s3.prefix import Prefix | 45 from boto.s3.prefix import Prefix |
44 | 46 |
45 # Permissions that may be set on each file in Google Storage. | 47 # Permissions that may be set on each file in Google Storage. |
46 # See SupportedPermissions in | 48 # See SupportedPermissions in |
47 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py | 49 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py |
48 PERMISSION_NONE = None | 50 PERMISSION_NONE = None |
49 PERMISSION_OWNER = 'FULL_CONTROL' | 51 PERMISSION_OWNER = 'FULL_CONTROL' |
50 PERMISSION_READ = 'READ' | 52 PERMISSION_READ = 'READ' |
51 PERMISSION_WRITE = 'WRITE' | 53 PERMISSION_WRITE = 'WRITE' |
52 | 54 |
53 # Types of identifiers we can use to set ACLs. | 55 # Types of identifiers we can use to set ACLs. |
54 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | 56 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN |
55 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | 57 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL |
56 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID | 58 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID |
57 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL | 59 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL |
58 ID_TYPE_USER_BY_ID = acl.USER_BY_ID | 60 ID_TYPE_USER_BY_ID = acl.USER_BY_ID |
59 | 61 |
60 # Which field we get/set in ACL entries, depending on ID_TYPE. | 62 # Which field we get/set in ACL entries, depending on ID_TYPE. |
61 FIELD_BY_ID_TYPE = { | 63 FIELD_BY_ID_TYPE = { |
62 ID_TYPE_GROUP_BY_DOMAIN: 'domain', | 64 ID_TYPE_GROUP_BY_DOMAIN: 'domain', |
63 ID_TYPE_GROUP_BY_EMAIL: 'email_address', | 65 ID_TYPE_GROUP_BY_EMAIL: 'email_address', |
64 ID_TYPE_GROUP_BY_ID: 'id', | 66 ID_TYPE_GROUP_BY_ID: 'id', |
65 ID_TYPE_USER_BY_EMAIL: 'email_address', | 67 ID_TYPE_USER_BY_EMAIL: 'email_address', |
66 ID_TYPE_USER_BY_ID: 'id', | 68 ID_TYPE_USER_BY_ID: 'id', |
67 } | 69 } |
68 | 70 |
69 | 71 |
72 class AnonymousGSConnection(GSConnection): | |
73 """The GSConnection class constructor in | |
rmistry
2014/07/16 13:55:01
The Nittiest of Nits:
The style guide says to have
epoger
2014/07/16 14:02:02
I'm not the biggest fan of that directive in the s
| |
74 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow | |
75 for anonymous connections (connections without credentials), so we have to | |
76 override it. | |
77 """ | |
78 def __init__(self): | |
79 super(GSConnection, self).__init__( | |
80 # This is the important bit we need to add... | |
81 anon=True, | |
82 # ...and these are just copied in from GSConnection.__init__() | |
83 bucket_class=Bucket, | |
84 calling_format=SubdomainCallingFormat(), | |
85 host=GSConnection.DefaultHost, | |
86 provider='google') | |
87 | |
88 | |
70 class GSUtils(object): | 89 class GSUtils(object): |
71 """Utilities for accessing Google Cloud Storage, using the boto library.""" | 90 """Utilities for accessing Google Cloud Storage, using the boto library.""" |
72 | 91 |
73 def __init__(self, boto_file_path=os.path.join('~','.boto')): | 92 def __init__(self, boto_file_path=None): |
74 """Constructor. | 93 """Constructor. |
75 | 94 |
76 Params: | 95 Params: |
77 boto_file_path: full path (local-OS-style) on local disk where .boto | 96 boto_file_path: full path (local-OS-style) on local disk where .boto |
78 credentials file can be found. An exception is thrown if this file | 97 credentials file can be found. If None, then the GSUtils object |
79 is missing. | 98 created will be able to access only public files in Google Storage. |
80 TODO(epoger): Change missing-file behavior: allow the caller to | 99 |
81 operate on public files in Google Storage. | 100 Raises an exception if no file is found at boto_file_path, or if the file |
101 found there is malformed. | |
82 """ | 102 """ |
83 boto_file_path = os.path.expanduser(boto_file_path) | 103 self._gs_access_key_id = None |
84 print 'Reading boto file from %s' % boto_file_path | 104 self._gs_secret_access_key = None |
85 boto_dict = _config_file_as_dict(filepath=boto_file_path) | 105 if boto_file_path: |
86 self._gs_access_key_id = boto_dict['gs_access_key_id'] | 106 print 'Reading boto file from %s' % boto_file_path |
87 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] | 107 boto_dict = _config_file_as_dict(filepath=boto_file_path) |
108 self._gs_access_key_id = boto_dict['gs_access_key_id'] | |
109 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] | |
88 | 110 |
89 def delete_file(self, bucket, path): | 111 def delete_file(self, bucket, path): |
90 """Delete a single file within a GS bucket. | 112 """Delete a single file within a GS bucket. |
91 | 113 |
92 TODO(epoger): what if bucket or path does not exist? Should probably raise | 114 TODO(epoger): what if bucket or path does not exist? Should probably raise |
93 an exception. Implement, and add a test to exercise this. | 115 an exception. Implement, and add a test to exercise this. |
94 | 116 |
95 Params: | 117 Params: |
96 bucket: GS bucket to delete a file from | 118 bucket: GS bucket to delete a file from |
97 path: full path (Posix-style) of the file within the bucket to delete | 119 path: full path (Posix-style) of the file within the bucket to delete |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
251 for item in lister: | 273 for item in lister: |
252 t = type(item) | 274 t = type(item) |
253 if t is Key: | 275 if t is Key: |
254 files.append(item.key[prefix_length:]) | 276 files.append(item.key[prefix_length:]) |
255 elif t is Prefix: | 277 elif t is Prefix: |
256 dirs.append(item.name[prefix_length:-1]) | 278 dirs.append(item.name[prefix_length:-1]) |
257 return (dirs, files) | 279 return (dirs, files) |
258 | 280 |
259 def _create_connection(self): | 281 def _create_connection(self): |
260 """Returns a GSConnection object we can use to access Google Storage.""" | 282 """Returns a GSConnection object we can use to access Google Storage.""" |
261 return GSConnection( | 283 if self._gs_access_key_id: |
262 gs_access_key_id=self._gs_access_key_id, | 284 return GSConnection( |
263 gs_secret_access_key=self._gs_secret_access_key) | 285 gs_access_key_id=self._gs_access_key_id, |
264 | 286 gs_secret_access_key=self._gs_secret_access_key) |
287 else: | |
288 return AnonymousGSConnection() | |
265 | 289 |
266 def _config_file_as_dict(filepath): | 290 def _config_file_as_dict(filepath): |
267 """Reads a boto-style config file into a dict. | 291 """Reads a boto-style config file into a dict. |
268 | 292 |
269 Parses all lines from the file of this form: key = value | 293 Parses all lines from the file of this form: key = value |
270 TODO(epoger): Create unittest. | 294 TODO(epoger): Create unittest. |
271 | 295 |
272 Params: | 296 Params: |
273 filepath: path to config file on local disk | 297 filepath: path to config file on local disk |
274 | 298 |
(...skipping 19 matching lines...) Expand all Loading... | |
294 Args: | 318 Args: |
295 path: full path of directory to create | 319 path: full path of directory to create |
296 """ | 320 """ |
297 try: | 321 try: |
298 os.makedirs(path) | 322 os.makedirs(path) |
299 except OSError as e: | 323 except OSError as e: |
300 if e.errno != errno.EEXIST: | 324 if e.errno != errno.EEXIST: |
301 raise | 325 raise |
302 | 326 |
303 | 327 |
304 def _run_self_test(): | 328 def _test_public_read(): |
329 """Make sure we can read from public files without .boto file credentials.""" | |
330 gs = GSUtils() | |
331 gs.list_bucket_contents(bucket='chromium-skia-gm-summaries', subdir=None) | |
332 | |
333 | |
334 def _test_authenticated_round_trip(): | |
335 try: | |
336 gs = GSUtils(boto_file_path=os.path.expanduser(os.path.join('~','.boto'))) | |
337 except: | |
338 print """ | |
339 Failed to instantiate GSUtils object with default .boto file path. | |
340 Do you have a ~/.boto file that provides the credentials needed to read | |
341 and write gs://chromium-skia-gm ? | |
342 """ | |
343 raise | |
344 | |
305 bucket = 'chromium-skia-gm' | 345 bucket = 'chromium-skia-gm' |
306 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) | 346 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
307 subdir = 'subdir' | 347 subdir = 'subdir' |
308 filenames_to_upload = ['file1', 'file2'] | 348 filenames_to_upload = ['file1', 'file2'] |
309 gs = GSUtils() | |
310 | 349 |
311 # Upload test files to Google Storage. | 350 # Upload test files to Google Storage. |
312 local_src_dir = tempfile.mkdtemp() | 351 local_src_dir = tempfile.mkdtemp() |
313 os.mkdir(os.path.join(local_src_dir, subdir)) | 352 os.mkdir(os.path.join(local_src_dir, subdir)) |
314 try: | 353 try: |
315 for filename in filenames_to_upload: | 354 for filename in filenames_to_upload: |
316 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: | 355 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
317 f.write('contents of %s\n' % filename) | 356 f.write('contents of %s\n' % filename) |
318 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), | 357 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |
319 dest_bucket=bucket, | 358 dest_bucket=bucket, |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
386 gs.delete_file(bucket=bucket, | 425 gs.delete_file(bucket=bucket, |
387 path=posixpath.join(remote_dir, subdir, filename)) | 426 path=posixpath.join(remote_dir, subdir, filename)) |
388 | 427 |
389 # Confirm that we deleted all the files we uploaded to Google Storage. | 428 # Confirm that we deleted all the files we uploaded to Google Storage. |
390 (dirs, files) = gs.list_bucket_contents( | 429 (dirs, files) = gs.list_bucket_contents( |
391 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 430 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
392 assert dirs == [], '%s == []' % dirs | 431 assert dirs == [], '%s == []' % dirs |
393 assert files == [], '%s == []' % files | 432 assert files == [], '%s == []' % files |
394 | 433 |
395 | 434 |
396 # TODO(epoger): How should we exercise this self-test? | 435 # TODO(epoger): How should we exercise these self-tests? |
397 # I avoided using the standard unittest framework, because these Google Storage | 436 # See http://skbug.com/2751 |
398 # operations are expensive and require .boto permissions. | |
399 # | |
400 # How can we automatically test this code without wasting too many resources | |
401 # or needing .boto permissions? | |
402 if __name__ == '__main__': | 437 if __name__ == '__main__': |
403 _run_self_test() | 438 _test_public_read() |
439 _test_authenticated_round_trip() | |
440 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise | |
441 # an exception when we try to access without needed credentials. | |
OLD | NEW |