OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library. | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
11 for the XML API). | |
11 | 12 |
12 See http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial -using-boto.html | 13 API/library references: |
13 for implementation tips. | 14 - https://developers.google.com/storage/docs/reference-guide |
15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u sing-boto.html | |
14 """ | 16 """ |
15 # pylint: enable=C0301 | 17 # pylint: enable=C0301 |
16 | 18 |
17 # System-level imports | 19 # System-level imports |
18 import errno | 20 import errno |
19 import os | 21 import os |
20 import posixpath | 22 import posixpath |
21 import random | 23 import random |
22 import re | 24 import re |
23 import shutil | 25 import shutil |
24 import sys | 26 import sys |
25 import tempfile | 27 import tempfile |
26 | 28 |
27 # Imports from third-party code | 29 # Imports from third-party code |
28 TRUNK_DIRECTORY = os.path.abspath(os.path.join( | 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
29 os.path.dirname(__file__), os.pardir, os.pardir)) | 31 os.path.dirname(__file__), os.pardir, os.pardir)) |
30 for import_subdir in ['boto']: | 32 for import_subdir in ['boto']: |
31 import_dirpath = os.path.join( | 33 import_dirpath = os.path.join( |
32 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) | 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) |
33 if import_dirpath not in sys.path: | 35 if import_dirpath not in sys.path: |
34 # We need to insert at the beginning of the path, to make sure that our | 36 # We need to insert at the beginning of the path, to make sure that our |
35 # imported versions are favored over others that might be in the path. | 37 # imported versions are favored over others that might be in the path. |
36 sys.path.insert(0, import_dirpath) | 38 sys.path.insert(0, import_dirpath) |
39 from boto.gs import acl | |
37 from boto.gs.connection import GSConnection | 40 from boto.gs.connection import GSConnection |
38 from boto.gs.key import Key | 41 from boto.gs.key import Key |
39 from boto.s3.bucketlistresultset import BucketListResultSet | 42 from boto.s3.bucketlistresultset import BucketListResultSet |
40 from boto.s3.prefix import Prefix | 43 from boto.s3.prefix import Prefix |
41 | 44 |
45 # Permissions that may be set on each file in Google Storage. | |
46 # See SupportedPermissions in | |
47 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py | |
48 PERMISSION_NONE = None | |
49 PERMISSION_OWNER = 'FULL_CONTROL' | |
50 PERMISSION_READ = 'READ' | |
51 PERMISSION_WRITE = 'WRITE' | |
52 | |
53 # Types of identifiers we can use to set ACLs. | |
54 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | |
55 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | |
56 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID | |
57 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL | |
58 ID_TYPE_USER_BY_ID = acl.USER_BY_ID | |
59 | |
60 # Which field we get/set in ACL entries, depending on ID_TYPE. | |
61 FIELD_BY_ID_TYPE = { | |
62 ID_TYPE_GROUP_BY_DOMAIN: 'domain', | |
63 ID_TYPE_GROUP_BY_EMAIL: 'email_address', | |
64 ID_TYPE_GROUP_BY_ID: 'id', | |
65 ID_TYPE_USER_BY_EMAIL: 'email_address', | |
66 ID_TYPE_USER_BY_ID: 'id', | |
67 } | |
68 | |
42 | 69 |
43 class GSUtils(object): | 70 class GSUtils(object): |
44 """Utilities for accessing Google Cloud Storage, using the boto library.""" | 71 """Utilities for accessing Google Cloud Storage, using the boto library.""" |
45 | 72 |
46 def __init__(self, boto_file_path=os.path.join('~','.boto')): | 73 def __init__(self, boto_file_path=os.path.join('~','.boto')): |
47 """Constructor. | 74 """Constructor. |
48 | 75 |
49 Params: | 76 Params: |
50 boto_file_path: full path (local-OS-style) on local disk where .boto | 77 boto_file_path: full path (local-OS-style) on local disk where .boto |
51 credentials file can be found. An exception is thrown if this file | 78 credentials file can be found. An exception is thrown if this file |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
106 """ | 133 """ |
107 conn = self._create_connection() | 134 conn = self._create_connection() |
108 b = conn.get_bucket(bucket_name=source_bucket) | 135 b = conn.get_bucket(bucket_name=source_bucket) |
109 item = Key(b) | 136 item = Key(b) |
110 item.key = source_path | 137 item.key = source_path |
111 if create_subdirs_if_needed: | 138 if create_subdirs_if_needed: |
112 _makedirs_if_needed(os.path.dirname(dest_path)) | 139 _makedirs_if_needed(os.path.dirname(dest_path)) |
113 with open(dest_path, 'w') as f: | 140 with open(dest_path, 'w') as f: |
114 item.get_contents_to_file(fp=f) | 141 item.get_contents_to_file(fp=f) |
115 | 142 |
143 def get_acl(self, bucket, path, id_type, id_value): | |
144 """Retrieve partial access permissions on a single file in Google Storage. | |
145 | |
146 Various users who match this id_type/id_value pair may have access rights | |
147 other than that returned by this call, if they have been granted those | |
148 rights based on *other* id_types (e.g., perhaps they have group access | |
149 rights, beyond their individual access rights). | |
150 | |
151 Params: | |
152 bucket: GS bucket | |
153 path: full path (Posix-style) to the file within that bucket | |
154 id_type: must be one of the ID_TYPE_* constants defined above | |
155 id_value: get permissions for users whose id_type field contains this | |
156 value | |
157 | |
158 Returns: the PERMISSION_* constant which has been set for users matching | |
159 this id_type/id_value, on this file; or PERMISSION_NONE if no such | |
160 permissions have been set. | |
161 """ | |
162 field = FIELD_BY_ID_TYPE[id_type] | |
163 conn = self._create_connection() | |
164 b = conn.get_bucket(bucket_name=bucket) | |
165 acls = b.get_acl(key_name=path) | |
166 matching_entries = [entry for entry in acls.entries.entry_list | |
167 if (entry.scope.type == id_type) and | |
168 (getattr(entry.scope, field) == id_value)] | |
169 if matching_entries: | |
170 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | |
171 return matching_entries[0].permission | |
172 else: | |
173 return PERMISSION_NONE | |
174 | |
175 def set_acl(self, bucket, path, id_type, id_value, permission): | |
176 """Set partial access permissions on a single file in Google Storage. | |
177 | |
178 Note that a single set_acl() call will not guarantee what access rights any | |
179 given user will have on a given file, because permissions are additive. | |
180 (E.g., if you set READ permission for a group, but a member of that group | |
181 already has WRITE permission, that member will still have WRITE permission.) | |
182 TODO(epoger): Do we know that for sure? I *think* that's how it works... | |
183 | |
184 If there is already a permission set on this file for this id_type/id_value | |
185 combination, this call will overwrite it. | |
186 | |
187 Params: | |
188 bucket: GS bucket | |
189 path: full path (Posix-style) to the file within that bucket | |
190 id_type: must be one of the ID_TYPE_* constants defined above | |
191 id_value: add permission for users whose id_type field contains this value | |
192 permission: permission to add for users matching id_type/id_value; | |
193 must be one of the PERMISSION_* constants defined above. | |
194 If PERMISSION_NONE, then any permissions will be granted to this | |
195 particular id_type/id_value will be removed... but, given that | |
196 permissions are additive, specific users may still have access rights | |
197 based on permissions given to *other* id_type/id_value pairs. | |
198 | |
199 Example Code: | |
200 bucket = 'gs://bucket-name' | |
201 path = 'path/to/file' | |
202 id_type = ID_TYPE_USER_BY_EMAIL | |
203 id_value = 'epoger@google.com' | |
204 set_acl(bucket, path, id_type, id_value, PERMISSION_READ) | |
205 assert PERMISSION_READ == get_acl(bucket, path, id_type, id_value) | |
206 set_acl(bucket, path, id_type, id_value, PERMISSION_WRITE) | |
207 assert PERMISSION_WRITE == get_acl(bucket, path, id_type, id_value) | |
208 """ | |
209 field = FIELD_BY_ID_TYPE[id_type] | |
epoger
2014/07/15 19:47:03
I can extract lines 209-218 into a separate functi
| |
210 conn = self._create_connection() | |
211 b = conn.get_bucket(bucket_name=bucket) | |
212 acls = b.get_acl(key_name=path) | |
213 | |
214 # Remove any existing entries that refer to the same id_type/id_value, | |
215 # because the API will fail if we try to set more than one. | |
216 matching_entries = [entry for entry in acls.entries.entry_list | |
217 if (entry.scope.type == id_type) and | |
218 (getattr(entry.scope, field) == id_value)] | |
219 if matching_entries: | |
220 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | |
221 acls.entries.entry_list.remove(matching_entries[0]) | |
222 | |
223 # Add a new entry to the ACLs. | |
224 if permission != PERMISSION_NONE: | |
225 args = {'type': id_type, 'permission': permission} | |
226 args[field] = id_value | |
227 new_entry = acl.Entry(**args) | |
228 acls.entries.entry_list.append(new_entry) | |
229 | |
230 # Finally, write back the modified ACLs. | |
231 b.set_acl(acl_or_str=acls, key_name=path) | |
232 | |
116 def list_bucket_contents(self, bucket, subdir=None): | 233 def list_bucket_contents(self, bucket, subdir=None): |
117 """Returns files in the Google Storage bucket as a (dirs, files) tuple. | 234 """Returns files in the Google Storage bucket as a (dirs, files) tuple. |
118 | 235 |
119 Args: | 236 Args: |
120 bucket: name of the Google Storage bucket | 237 bucket: name of the Google Storage bucket |
121 subdir: directory within the bucket to list, or None for root directory | 238 subdir: directory within the bucket to list, or None for root directory |
122 """ | 239 """ |
123 # The GS command relies on the prefix (if any) ending with a slash. | 240 # The GS command relies on the prefix (if any) ending with a slash. |
124 prefix = subdir or '' | 241 prefix = subdir or '' |
125 if prefix and not prefix.endswith('/'): | 242 if prefix and not prefix.endswith('/'): |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
207 # Get a list of the files we uploaded to Google Storage. | 324 # Get a list of the files we uploaded to Google Storage. |
208 (dirs, files) = gs.list_bucket_contents( | 325 (dirs, files) = gs.list_bucket_contents( |
209 bucket=bucket, subdir=remote_dir) | 326 bucket=bucket, subdir=remote_dir) |
210 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) | 327 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) |
211 assert files == [], '%s == []' % files | 328 assert files == [], '%s == []' % files |
212 (dirs, files) = gs.list_bucket_contents( | 329 (dirs, files) = gs.list_bucket_contents( |
213 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 330 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
214 assert dirs == [], '%s == []' % dirs | 331 assert dirs == [], '%s == []' % dirs |
215 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) | 332 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) |
216 | 333 |
334 # Manipulate ACLs on one of those files, and verify them. | |
335 # TODO(epoger): Test id_types other than ID_TYPE_GROUP_BY_DOMAIN ? | |
336 # TODO(epoger): Test setting multiple ACLs on the same file? | |
337 id_type = ID_TYPE_GROUP_BY_DOMAIN | |
338 id_value = 'google.com' | |
339 fullpath = posixpath.join(remote_dir, subdir, filenames_to_upload[0]) | |
340 # Make sure ACL is empty to start with ... | |
341 gs.set_acl(bucket=bucket, path=fullpath, | |
342 id_type=id_type, id_value=id_value, permission=PERMISSION_NONE) | |
343 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
344 id_type=id_type, id_value=id_value) | |
345 assert permission == PERMISSION_NONE, '%s == %s' % ( | |
346 permission, PERMISSION_NONE) | |
347 # ... set it to OWNER ... | |
348 gs.set_acl(bucket=bucket, path=fullpath, | |
349 id_type=id_type, id_value=id_value, permission=PERMISSION_OWNER) | |
350 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
351 id_type=id_type, id_value=id_value) | |
352 assert permission == PERMISSION_OWNER, '%s == %s' % ( | |
353 permission, PERMISSION_OWNER) | |
354 # ... now set it to READ ... | |
355 gs.set_acl(bucket=bucket, path=fullpath, | |
356 id_type=id_type, id_value=id_value, permission=PERMISSION_READ) | |
357 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
358 id_type=id_type, id_value=id_value) | |
359 assert permission == PERMISSION_READ, '%s == %s' % ( | |
360 permission, PERMISSION_READ) | |
361 # ... and clear it again to finish. | |
362 gs.set_acl(bucket=bucket, path=fullpath, | |
363 id_type=id_type, id_value=id_value, permission=PERMISSION_NONE) | |
364 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
365 id_type=id_type, id_value=id_value) | |
366 assert permission == PERMISSION_NONE, '%s == %s' % ( | |
367 permission, PERMISSION_NONE) | |
368 | |
217 # Download the files we uploaded to Google Storage, and validate contents. | 369 # Download the files we uploaded to Google Storage, and validate contents. |
218 local_dest_dir = tempfile.mkdtemp() | 370 local_dest_dir = tempfile.mkdtemp() |
219 try: | 371 try: |
220 for filename in filenames_to_upload: | 372 for filename in filenames_to_upload: |
221 gs.download_file(source_bucket=bucket, | 373 gs.download_file(source_bucket=bucket, |
222 source_path=posixpath.join(remote_dir, subdir, filename), | 374 source_path=posixpath.join(remote_dir, subdir, filename), |
223 dest_path=os.path.join(local_dest_dir, subdir, filename), | 375 dest_path=os.path.join(local_dest_dir, subdir, filename), |
224 create_subdirs_if_needed=True) | 376 create_subdirs_if_needed=True) |
225 with open(os.path.join(local_dest_dir, subdir, filename)) as f: | 377 with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
226 file_contents = f.read() | 378 file_contents = f.read() |
(...skipping 15 matching lines...) Expand all Loading... | |
242 | 394 |
243 | 395 |
244 # TODO(epoger): How should we exercise this self-test? | 396 # TODO(epoger): How should we exercise this self-test? |
245 # I avoided using the standard unittest framework, because these Google Storage | 397 # I avoided using the standard unittest framework, because these Google Storage |
246 # operations are expensive and require .boto permissions. | 398 # operations are expensive and require .boto permissions. |
247 # | 399 # |
248 # How can we automatically test this code without wasting too many resources | 400 # How can we automatically test this code without wasting too many resources |
249 # or needing .boto permissions? | 401 # or needing .boto permissions? |
250 if __name__ == '__main__': | 402 if __name__ == '__main__': |
251 _run_self_test() | 403 _run_self_test() |
OLD | NEW |