OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library. | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
11 for the XML API). | |
11 | 12 |
12 See http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial -using-boto.html | 13 API/library references: |
13 for implementation tips. | 14 - https://developers.google.com/storage/docs/reference-guide |
15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u sing-boto.html | |
14 """ | 16 """ |
15 # pylint: enable=C0301 | 17 # pylint: enable=C0301 |
16 | 18 |
17 # System-level imports | 19 # System-level imports |
18 import errno | 20 import errno |
19 import os | 21 import os |
20 import posixpath | 22 import posixpath |
21 import random | 23 import random |
22 import re | 24 import re |
23 import shutil | 25 import shutil |
24 import sys | 26 import sys |
25 import tempfile | 27 import tempfile |
26 | 28 |
27 # Imports from third-party code | 29 # Imports from third-party code |
28 TRUNK_DIRECTORY = os.path.abspath(os.path.join( | 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
29 os.path.dirname(__file__), os.pardir, os.pardir)) | 31 os.path.dirname(__file__), os.pardir, os.pardir)) |
30 for import_subdir in ['boto']: | 32 for import_subdir in ['boto']: |
31 import_dirpath = os.path.join( | 33 import_dirpath = os.path.join( |
32 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) | 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) |
33 if import_dirpath not in sys.path: | 35 if import_dirpath not in sys.path: |
34 # We need to insert at the beginning of the path, to make sure that our | 36 # We need to insert at the beginning of the path, to make sure that our |
35 # imported versions are favored over others that might be in the path. | 37 # imported versions are favored over others that might be in the path. |
36 sys.path.insert(0, import_dirpath) | 38 sys.path.insert(0, import_dirpath) |
39 from boto.gs import acl | |
37 from boto.gs.connection import GSConnection | 40 from boto.gs.connection import GSConnection |
38 from boto.gs.key import Key | 41 from boto.gs.key import Key |
39 from boto.s3.bucketlistresultset import BucketListResultSet | 42 from boto.s3.bucketlistresultset import BucketListResultSet |
40 from boto.s3.prefix import Prefix | 43 from boto.s3.prefix import Prefix |
41 | 44 |
45 # SupportedPermissions as listed in | |
46 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py | |
47 PERMISSION_OWNER = 'FULL_CONTROL' | |
48 PERMISSION_READ = 'READ' | |
49 PERMISSION_WRITE = 'WRITE' | |
50 | |
51 # Types of identifiers we can use to set ACLs. | |
52 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | |
53 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | |
54 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID | |
55 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL | |
56 ID_TYPE_USER_BY_ID = acl.USER_BY_ID | |
57 | |
58 # Which field we get/set in ACL entries, depending on ID_TYPE. | |
59 FIELD_BY_ID_TYPE = { | |
60 ID_TYPE_GROUP_BY_DOMAIN: 'domain', | |
61 ID_TYPE_GROUP_BY_EMAIL: 'email_address', | |
62 ID_TYPE_GROUP_BY_ID: 'id', | |
63 ID_TYPE_USER_BY_EMAIL: 'email_address', | |
64 ID_TYPE_USER_BY_ID: 'id', | |
65 } | |
66 | |
42 | 67 |
43 class GSUtils(object): | 68 class GSUtils(object): |
44 """Utilities for accessing Google Cloud Storage, using the boto library.""" | 69 """Utilities for accessing Google Cloud Storage, using the boto library.""" |
45 | 70 |
46 def __init__(self, boto_file_path=os.path.join('~','.boto')): | 71 def __init__(self, boto_file_path=os.path.join('~','.boto')): |
47 """Constructor. | 72 """Constructor. |
48 | 73 |
49 Params: | 74 Params: |
50 boto_file_path: full path (local-OS-style) on local disk where .boto | 75 boto_file_path: full path (local-OS-style) on local disk where .boto |
51 credentials file can be found. An exception is thrown if this file | 76 credentials file can be found. An exception is thrown if this file |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
106 """ | 131 """ |
107 conn = self._create_connection() | 132 conn = self._create_connection() |
108 b = conn.get_bucket(bucket_name=source_bucket) | 133 b = conn.get_bucket(bucket_name=source_bucket) |
109 item = Key(b) | 134 item = Key(b) |
110 item.key = source_path | 135 item.key = source_path |
111 if create_subdirs_if_needed: | 136 if create_subdirs_if_needed: |
112 _makedirs_if_needed(os.path.dirname(dest_path)) | 137 _makedirs_if_needed(os.path.dirname(dest_path)) |
113 with open(dest_path, 'w') as f: | 138 with open(dest_path, 'w') as f: |
114 item.get_contents_to_file(fp=f) | 139 item.get_contents_to_file(fp=f) |
115 | 140 |
141 def add_acl(self, bucket, path, id_type, id_value, permission): | |
142 """Add access permissions on a single file in Google Storage. | |
143 | |
144 After this call, the set of users with access rights will always be >= | |
145 the set of users with access rights before the call, because the permissions | |
146 are additive. | |
147 (E.g., if you add READ permission for a group, but a member of that group | |
148 already has WRITE permission, that member will still have WRITE permission.) | |
149 TODO(epoger): Do we know that for sure? I *think* that's how it works... | |
150 | |
151 If there is already a permission set for this id_type/id_value combination, | |
152 this call will overwrite it. | |
153 | |
154 Params: | |
155 bucket: GS bucket | |
156 path: full path (Posix-style) to the file within that bucket | |
157 id_type: must be one of the ID_TYPE_* constants defined above | |
158 id_value: add permission for users whose id_type field contains this value | |
159 permission: permission to add for users matching id_type/id_value; | |
160 must be one of the PERMISSION_* constants defined above | |
161 """ | |
162 field = FIELD_BY_ID_TYPE[id_type] | |
163 conn = self._create_connection() | |
164 b = conn.get_bucket(bucket_name=bucket) | |
165 acls = b.get_acl(key_name=path) | |
166 | |
167 # Remove any existing entries that refer to the same id_type/id_value, | |
168 # because the API will fail if we try to set more than one. | |
169 matching_entries = [entry for entry in acls.entries.entry_list | |
170 if (entry.scope.type == id_type) and | |
171 (getattr(entry.scope, field) == id_value)] | |
172 if matching_entries: | |
173 for entry in matching_entries: | |
rmistry
2014/07/15 11:26:10
Is it possible to ever get a list of more than one
epoger
2014/07/15 13:20:18
AFAICT we should always get either 0 or 1 matching
| |
174 acls.entries.entry_list.remove(entry) | |
175 | |
176 # Add a new entry to the ACLs. | |
177 args = {'type': id_type, 'permission': permission} | |
178 args[field] = id_value | |
179 new_entry = acl.Entry(**args) | |
180 acls.entries.entry_list.append(new_entry) | |
181 b.set_acl(acl_or_str=acls, key_name=path) | |
182 | |
183 def delete_acl(self, bucket, path, id_type, id_value): | |
184 """Delete certain access permissions on a single file in Google Storage. | |
185 | |
186 Various users who match this id_type/id_value pair may still have access | |
187 rights to this file after this call, if they have been granted those rights | |
188 based on *other* id_types (e.g., perhaps they still have individual user | |
189 access rights, even if their group access rights are removed). | |
190 | |
191 If no permissions have been added for this id_type/id_value, this will | |
192 return uneventfully (there will be no exception or other indication of | |
193 failure). | |
194 | |
195 Params: | |
196 bucket: GS bucket | |
197 path: full path (Posix-style) to the file within that bucket | |
198 id_type: must be one of the ID_TYPE_* constants defined above | |
199 id_value: delete permissions for users whose id_type field contains this | |
200 value | |
201 """ | |
202 field = FIELD_BY_ID_TYPE[id_type] | |
203 conn = self._create_connection() | |
204 b = conn.get_bucket(bucket_name=bucket) | |
205 acls = b.get_acl(key_name=path) | |
206 matching_entries = [entry for entry in acls.entries.entry_list | |
207 if (entry.scope.type == id_type) and | |
208 (getattr(entry.scope, field) == id_value)] | |
209 if matching_entries: | |
210 for entry in matching_entries: | |
211 acls.entries.entry_list.remove(entry) | |
212 b.set_acl(acl_or_str=acls, key_name=path) | |
213 | |
214 def get_acl(self, bucket, path, id_type, id_value): | |
215 """Retrieve partial access permissions on a single file in Google Storage. | |
216 | |
217 Various users who match this id_type/id_value pair may have access rights | |
218 other than that returned by this call, if they have been granted those | |
219 rights based on *other* id_types (e.g., perhaps they have group access | |
220 rights, beyond their individual access rights). | |
221 | |
222 Params: | |
223 bucket: GS bucket | |
224 path: full path (Posix-style) to the file within that bucket | |
225 id_type: must be one of the ID_TYPE_* constants defined above | |
226 id_value: delete permissions for users whose id_type field contains this | |
epoger
2014/07/15 13:34:26
delete -> get
| |
227 value | |
228 | |
epoger
2014/07/15 13:34:26
Add an example
| |
229 Returns: the PERMISSION_* constant which has been set for users matching | |
230 this id_type/id_value, on this file; or None if no such permissions have | |
231 been set. | |
rmistry
2014/07/15 11:26:10
[Optional] How about creating a PERMISSION_NONE or
epoger
2014/07/15 13:20:18
I can do that, *if* you think that would also make
| |
232 """ | |
233 field = FIELD_BY_ID_TYPE[id_type] | |
234 conn = self._create_connection() | |
235 b = conn.get_bucket(bucket_name=bucket) | |
236 acls = b.get_acl(key_name=path) | |
237 matching_entries = [entry for entry in acls.entries.entry_list | |
238 if (entry.scope.type == id_type) and | |
239 (getattr(entry.scope, field) == id_value)] | |
240 if matching_entries: | |
241 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | |
242 return matching_entries[0].permission | |
243 else: | |
244 return None | |
245 | |
116 def list_bucket_contents(self, bucket, subdir=None): | 246 def list_bucket_contents(self, bucket, subdir=None): |
117 """Returns files in the Google Storage bucket as a (dirs, files) tuple. | 247 """Returns files in the Google Storage bucket as a (dirs, files) tuple. |
118 | 248 |
119 Args: | 249 Args: |
120 bucket: name of the Google Storage bucket | 250 bucket: name of the Google Storage bucket |
121 subdir: directory within the bucket to list, or None for root directory | 251 subdir: directory within the bucket to list, or None for root directory |
122 """ | 252 """ |
123 # The GS command relies on the prefix (if any) ending with a slash. | 253 # The GS command relies on the prefix (if any) ending with a slash. |
124 prefix = subdir or '' | 254 prefix = subdir or '' |
125 if prefix and not prefix.endswith('/'): | 255 if prefix and not prefix.endswith('/'): |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
207 # Get a list of the files we uploaded to Google Storage. | 337 # Get a list of the files we uploaded to Google Storage. |
208 (dirs, files) = gs.list_bucket_contents( | 338 (dirs, files) = gs.list_bucket_contents( |
209 bucket=bucket, subdir=remote_dir) | 339 bucket=bucket, subdir=remote_dir) |
210 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) | 340 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) |
211 assert files == [], '%s == []' % files | 341 assert files == [], '%s == []' % files |
212 (dirs, files) = gs.list_bucket_contents( | 342 (dirs, files) = gs.list_bucket_contents( |
213 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 343 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
214 assert dirs == [], '%s == []' % dirs | 344 assert dirs == [], '%s == []' % dirs |
215 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) | 345 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) |
216 | 346 |
347 # Manipulate ACLs on one of those files, and verify them. | |
348 # TODO(epoger): Test id_types other than ID_TYPE_GROUP_BY_DOMAIN ? | |
349 # TODO(epoger): Test setting multiple ACLs on the same file? | |
350 id_type = ID_TYPE_GROUP_BY_DOMAIN | |
351 id_value = 'google.com' | |
352 fullpath = posixpath.join(remote_dir, subdir, filenames_to_upload[0]) | |
353 # Make sure ACL is empty to start with ... | |
354 gs.delete_acl(bucket=bucket, path=fullpath, | |
355 id_type=id_type, id_value=id_value) | |
356 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
357 id_type=id_type, id_value=id_value) | |
358 assert permission == None, '%s == None' % permission | |
359 # ... set it to OWNER ... | |
360 gs.add_acl(bucket=bucket, path=fullpath, | |
361 id_type=id_type, id_value=id_value, permission=PERMISSION_OWNER) | |
362 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
363 id_type=id_type, id_value=id_value) | |
364 assert permission == PERMISSION_OWNER, '%s == %s' % ( | |
365 permission, PERMISSION_OWNER) | |
366 # ... now set it to READ ... | |
367 gs.add_acl(bucket=bucket, path=fullpath, | |
368 id_type=id_type, id_value=id_value, permission=PERMISSION_READ) | |
369 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
370 id_type=id_type, id_value=id_value) | |
371 assert permission == PERMISSION_READ, '%s == %s' % ( | |
372 permission, PERMISSION_READ) | |
373 # ... and clear it again to finish. | |
374 gs.delete_acl(bucket=bucket, path=fullpath, | |
375 id_type=id_type, id_value=id_value) | |
376 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
377 id_type=id_type, id_value=id_value) | |
378 assert permission == None, '%s == None' % permission | |
379 | |
217 # Download the files we uploaded to Google Storage, and validate contents. | 380 # Download the files we uploaded to Google Storage, and validate contents. |
218 local_dest_dir = tempfile.mkdtemp() | 381 local_dest_dir = tempfile.mkdtemp() |
219 try: | 382 try: |
220 for filename in filenames_to_upload: | 383 for filename in filenames_to_upload: |
221 gs.download_file(source_bucket=bucket, | 384 gs.download_file(source_bucket=bucket, |
222 source_path=posixpath.join(remote_dir, subdir, filename), | 385 source_path=posixpath.join(remote_dir, subdir, filename), |
223 dest_path=os.path.join(local_dest_dir, subdir, filename), | 386 dest_path=os.path.join(local_dest_dir, subdir, filename), |
224 create_subdirs_if_needed=True) | 387 create_subdirs_if_needed=True) |
225 with open(os.path.join(local_dest_dir, subdir, filename)) as f: | 388 with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
226 file_contents = f.read() | 389 file_contents = f.read() |
(...skipping 15 matching lines...) Expand all Loading... | |
242 | 405 |
243 | 406 |
244 # TODO(epoger): How should we exercise this self-test? | 407 # TODO(epoger): How should we exercise this self-test? |
245 # I avoided using the standard unittest framework, because these Google Storage | 408 # I avoided using the standard unittest framework, because these Google Storage |
246 # operations are expensive and require .boto permissions. | 409 # operations are expensive and require .boto permissions. |
247 # | 410 # |
248 # How can we automatically test this code without wasting too many resources | 411 # How can we automatically test this code without wasting too many resources |
249 # or needing .boto permissions? | 412 # or needing .boto permissions? |
250 if __name__ == '__main__': | 413 if __name__ == '__main__': |
251 _run_self_test() | 414 _run_self_test() |
OLD | NEW |