Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1182)

Side by Side Diff: py/utils/gs_utils.py

Issue 407533002: add upload_dir_contents() and download_dir_contents() to gs_utils.py (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: update default predefined_acl Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper
(...skipping 18 matching lines...) Expand all
29 # Imports from third-party code 29 # Imports from third-party code
30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join(
31 os.path.dirname(__file__), os.pardir, os.pardir)) 31 os.path.dirname(__file__), os.pardir, os.pardir))
32 for import_subdir in ['boto']: 32 for import_subdir in ['boto']:
33 import_dirpath = os.path.join( 33 import_dirpath = os.path.join(
34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir)
35 if import_dirpath not in sys.path: 35 if import_dirpath not in sys.path:
36 # We need to insert at the beginning of the path, to make sure that our 36 # We need to insert at the beginning of the path, to make sure that our
37 # imported versions are favored over others that might be in the path. 37 # imported versions are favored over others that might be in the path.
38 sys.path.insert(0, import_dirpath) 38 sys.path.insert(0, import_dirpath)
39 from boto.exception import BotoServerError
39 from boto.gs import acl 40 from boto.gs import acl
40 from boto.gs.bucket import Bucket 41 from boto.gs.bucket import Bucket
41 from boto.gs.connection import GSConnection 42 from boto.gs.connection import GSConnection
42 from boto.gs.key import Key 43 from boto.gs.key import Key
43 from boto.s3.bucketlistresultset import BucketListResultSet 44 from boto.s3.bucketlistresultset import BucketListResultSet
44 from boto.s3.connection import SubdomainCallingFormat 45 from boto.s3.connection import SubdomainCallingFormat
45 from boto.s3.prefix import Prefix 46 from boto.s3.prefix import Prefix
46 47
47 # Permissions that may be set on each file in Google Storage. 48 # Predefined (aka "canned") ACLs that provide a "base coat" of permissions for
48 # See SupportedPermissions in 49 # each file in Google Storage. See CannedACLStrings in
49 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py 50 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py
51 # Also see https://developers.google.com/storage/docs/accesscontrol
52 PREDEFINED_ACL_AUTHENTICATED_READ = 'authenticated-read'
53 PREDEFINED_ACL_BUCKET_OWNER_FULL_CONTROL = 'bucket-owner-full-control'
54 PREDEFINED_ACL_BUCKET_OWNER_READ = 'bucket-owner-read'
55 PREDEFINED_ACL_PRIVATE = 'private'
56 PREDEFINED_ACL_PROJECT_PRIVATE = 'project-private'
57 PREDEFINED_ACL_PUBLIC_READ = 'public-read'
58 PREDEFINED_ACL_PUBLIC_READ_WRITE = 'public-read-write'
59
60 # "Fine-grained" permissions that may be set per user/group on each file in
61 # Google Storage. See SupportedPermissions in
62 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py
63 # Also see https://developers.google.com/storage/docs/accesscontrol
50 PERMISSION_NONE = None 64 PERMISSION_NONE = None
51 PERMISSION_OWNER = 'FULL_CONTROL' 65 PERMISSION_OWNER = 'FULL_CONTROL'
52 PERMISSION_READ = 'READ' 66 PERMISSION_READ = 'READ'
53 PERMISSION_WRITE = 'WRITE' 67 PERMISSION_WRITE = 'WRITE'
54 68
55 # Types of identifiers we can use to set ACLs. 69 # Types of identifiers we can use to set "fine-grained" ACLs.
56 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN 70 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN
57 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL 71 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL
58 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID 72 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID
59 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL 73 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL
60 ID_TYPE_USER_BY_ID = acl.USER_BY_ID 74 ID_TYPE_USER_BY_ID = acl.USER_BY_ID
61 75
62 # Which field we get/set in ACL entries, depending on ID_TYPE. 76 # Which field we get/set in ACL entries, depending on ID_TYPE.
63 FIELD_BY_ID_TYPE = { 77 FIELD_BY_ID_TYPE = {
64 ID_TYPE_GROUP_BY_DOMAIN: 'domain', 78 ID_TYPE_GROUP_BY_DOMAIN: 'domain',
65 ID_TYPE_GROUP_BY_EMAIL: 'email_address', 79 ID_TYPE_GROUP_BY_EMAIL: 'email_address',
66 ID_TYPE_GROUP_BY_ID: 'id', 80 ID_TYPE_GROUP_BY_ID: 'id',
67 ID_TYPE_USER_BY_EMAIL: 'email_address', 81 ID_TYPE_USER_BY_EMAIL: 'email_address',
68 ID_TYPE_USER_BY_ID: 'id', 82 ID_TYPE_USER_BY_ID: 'id',
69 } 83 }
70 84
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 def delete_file(self, bucket, path): 127 def delete_file(self, bucket, path):
114 """Delete a single file within a GS bucket. 128 """Delete a single file within a GS bucket.
115 129
116 TODO(epoger): what if bucket or path does not exist? Should probably raise 130 TODO(epoger): what if bucket or path does not exist? Should probably raise
117 an exception. Implement, and add a test to exercise this. 131 an exception. Implement, and add a test to exercise this.
118 132
119 Params: 133 Params:
120 bucket: GS bucket to delete a file from 134 bucket: GS bucket to delete a file from
121 path: full path (Posix-style) of the file within the bucket to delete 135 path: full path (Posix-style) of the file within the bucket to delete
122 """ 136 """
123 conn = self._create_connection() 137 b = self._connect_to_bucket(bucket_name=bucket)
124 b = conn.get_bucket(bucket_name=bucket)
125 item = Key(b) 138 item = Key(b)
126 item.key = path 139 item.key = path
127 item.delete() 140 try:
141 item.delete()
142 except BotoServerError, e:
143 e.body = (repr(e.body) +
144 ' while deleting bucket=%s, path=%s' % (bucket, path))
145 raise
128 146
129 def upload_file(self, source_path, dest_bucket, dest_path): 147 def upload_file(self, source_path, dest_bucket, dest_path,
148 predefined_acl=None, fine_grained_acl_list=None):
130 """Upload contents of a local file to Google Storage. 149 """Upload contents of a local file to Google Storage.
131 150
132 TODO(epoger): Add the extra parameters provided by upload_file() within 151 TODO(epoger): Add the only_if_modified param provided by upload_file() in
133 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts /utils/old_gs_utils.py , 152 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts /utils/old_gs_utils.py ,
134 so we can replace that function with this one. 153 so we can replace that function with this one.
135 154
136 params: 155 params:
137 source_path: full path (local-OS-style) on local disk to read from 156 source_path: full path (local-OS-style) on local disk to read from
138 dest_bucket: GCS bucket to copy the file to 157 dest_bucket: GCS bucket to copy the file to
139 dest_path: full path (Posix-style) within that bucket 158 dest_path: full path (Posix-style) within that bucket
159 predefined_acl: which predefined ACL to apply to the file on Google
160 Storage; must be one of the PREDEFINED_ACL_* constants defined above.
161 If None, inherits dest_bucket's default object ACL.
162 TODO(epoger): add unittests for this param, although it seems to work
163 in my manual testing
164 fine_grained_acl_list: list of (id_type, id_value, permission) tuples
165 to apply to the uploaded file (on top of the predefined_acl),
166 or None if predefined_acl is sufficient
140 """ 167 """
141 conn = self._create_connection() 168 b = self._connect_to_bucket(bucket_name=dest_bucket)
142 b = conn.get_bucket(bucket_name=dest_bucket)
143 item = Key(b) 169 item = Key(b)
144 item.key = dest_path 170 item.key = dest_path
145 item.set_contents_from_filename(filename=source_path) 171 try:
172 item.set_contents_from_filename(filename=source_path,
173 policy=predefined_acl)
174 except BotoServerError, e:
175 e.body = (repr(e.body) +
176 ' while uploading source_path=%s to bucket=%s, path=%s' % (
177 source_path, dest_bucket, item.key))
178 raise
179 # TODO(epoger): This may be inefficient, because it calls
180 # _connect_to_bucket() again. Depending on how expensive that
181 # call is, we may want to optimize this.
182 for (id_type, id_value, permission) in fine_grained_acl_list or []:
183 self.set_acl(
184 bucket=dest_bucket, path=item.key,
185 id_type=id_type, id_value=id_value, permission=permission)
186
187 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir,
188 predefined_acl=None, fine_grained_acl_list=None):
189 """Recursively upload contents of a local directory to Google Storage.
190
191 params:
192 source_dir: full path (local-OS-style) on local disk of directory to copy
193 contents of
194 dest_bucket: GCS bucket to copy the files into
195 dest_dir: full path (Posix-style) within that bucket; write the files into
196 this directory
197 predefined_acl: which predefined ACL to apply to the files on Google
198 Storage; must be one of the PREDEFINED_ACL_* constants defined above.
199 If None, inherits dest_bucket's default object ACL.
200 TODO(epoger): add unittests for this param, although it seems to work
201 in my manual testing
202 fine_grained_acl_list: list of (id_type, id_value, permission) tuples
203 to apply to every file uploaded (on top of the predefined_acl),
204 or None if predefined_acl is sufficient
205 TODO(epoger): add unittests for this param, although it seems to work
206 in my manual testing
207
208 The copy operates as a "merge with overwrite": any files in source_dir will
209 be "overlaid" on top of the existing content in dest_dir. Existing files
210 with the same names will be overwritten.
211
212 TODO(epoger): Upload multiple files simultaneously to reduce latency.
213
214 TODO(epoger): Add a "noclobber" mode that will not upload any files would
215 overwrite existing files in Google Storage.
216
217 TODO(epoger): Consider adding a do_compress parameter that would compress
218 the file using gzip before upload, and add a "Content-Encoding:gzip" header
219 so that HTTP downloads of the file would be unzipped automatically.
220 See https://developers.google.com/storage/docs/gsutil/addlhelp/
221 WorkingWithObjectMetadata#content-encoding
222 """
223 b = self._connect_to_bucket(bucket_name=dest_bucket)
224 for filename in sorted(os.listdir(source_dir)):
225 local_path = os.path.join(source_dir, filename)
226 if os.path.isdir(local_path):
227 self.upload_dir_contents( # recurse
228 source_dir=local_path, dest_bucket=dest_bucket,
229 dest_dir=posixpath.join(dest_dir, filename),
230 predefined_acl=predefined_acl)
231 else:
232 item = Key(b)
233 item.key = posixpath.join(dest_dir, filename)
234 try:
235 item.set_contents_from_filename(
236 filename=local_path, policy=predefined_acl)
237 except BotoServerError, e:
238 e.body = (repr(e.body) +
239 ' while uploading local_path=%s to bucket=%s, path=%s' % (
240 local_path, dest_bucket, item.key))
241 raise
242 # TODO(epoger): This may be inefficient, because it calls
243 # _connect_to_bucket() for every file. Depending on how expensive that
244 # call is, we may want to optimize this.
245 for (id_type, id_value, permission) in fine_grained_acl_list or []:
246 self.set_acl(
247 bucket=dest_bucket, path=item.key,
248 id_type=id_type, id_value=id_value, permission=permission)
146 249
147 def download_file(self, source_bucket, source_path, dest_path, 250 def download_file(self, source_bucket, source_path, dest_path,
148 create_subdirs_if_needed=False): 251 create_subdirs_if_needed=False):
149 """Downloads a single file from Google Cloud Storage to local disk. 252 """Downloads a single file from Google Cloud Storage to local disk.
150 253
151 Args: 254 Args:
152 source_bucket: GCS bucket to download the file from 255 source_bucket: GCS bucket to download the file from
153 source_path: full path (Posix-style) within that bucket 256 source_path: full path (Posix-style) within that bucket
154 dest_path: full path (local-OS-style) on local disk to copy the file to 257 dest_path: full path (local-OS-style) on local disk to copy the file to
155 create_subdirs_if_needed: boolean; whether to create subdirectories as 258 create_subdirs_if_needed: boolean; whether to create subdirectories as
156 needed to create dest_path 259 needed to create dest_path
157 """ 260 """
158 conn = self._create_connection() 261 b = self._connect_to_bucket(bucket_name=source_bucket)
159 b = conn.get_bucket(bucket_name=source_bucket)
160 item = Key(b) 262 item = Key(b)
161 item.key = source_path 263 item.key = source_path
162 if create_subdirs_if_needed: 264 if create_subdirs_if_needed:
163 _makedirs_if_needed(os.path.dirname(dest_path)) 265 _makedirs_if_needed(os.path.dirname(dest_path))
164 with open(dest_path, 'w') as f: 266 with open(dest_path, 'w') as f:
165 item.get_contents_to_file(fp=f) 267 try:
268 item.get_contents_to_file(fp=f)
269 except BotoServerError, e:
270 e.body = (repr(e.body) +
271 ' while downloading bucket=%s, path=%s to local_path=%s' % (
272 source_bucket, source_path, dest_path))
273 raise
274
275 def download_dir_contents(self, source_bucket, source_dir, dest_dir):
276 """Recursively download contents of a Google Storage directory to local disk
277
278 params:
279 source_bucket: GCS bucket to copy the files from
280 source_dir: full path (Posix-style) within that bucket; read the files
281 from this directory
282 dest_dir: full path (local-OS-style) on local disk of directory to copy
283 the files into
284
285 The copy operates as a "merge with overwrite": any files in source_dir will
286 be "overlaid" on top of the existing content in dest_dir. Existing files
287 with the same names will be overwritten.
288
289 TODO(epoger): Download multiple files simultaneously to reduce latency.
290 """
291 _makedirs_if_needed(dest_dir)
292 b = self._connect_to_bucket(bucket_name=source_bucket)
293 (dirs, files) = self.list_bucket_contents(
294 bucket=source_bucket, subdir=source_dir)
295
296 for filename in files:
297 item = Key(b)
298 item.key = posixpath.join(source_dir, filename)
299 dest_path = os.path.join(dest_dir, filename)
300 with open(dest_path, 'w') as f:
301 try:
302 item.get_contents_to_file(fp=f)
303 except BotoServerError, e:
304 e.body = (repr(e.body) +
305 ' while downloading bucket=%s, path=%s to local_path=%s' % (
306 source_bucket, item.key, dest_path))
307 raise
308
309 for dirname in dirs:
310 self.download_dir_contents( # recurse
311 source_bucket=source_bucket,
312 source_dir=posixpath.join(source_dir, dirname),
313 dest_dir=os.path.join(dest_dir, dirname))
166 314
167 def get_acl(self, bucket, path, id_type, id_value): 315 def get_acl(self, bucket, path, id_type, id_value):
168 """Retrieve partial access permissions on a single file in Google Storage. 316 """Retrieve partial access permissions on a single file in Google Storage.
169 317
170 Various users who match this id_type/id_value pair may have access rights 318 Various users who match this id_type/id_value pair may have access rights
171 other than that returned by this call, if they have been granted those 319 other than that returned by this call, if they have been granted those
172 rights based on *other* id_types (e.g., perhaps they have group access 320 rights based on *other* id_types (e.g., perhaps they have group access
173 rights, beyond their individual access rights). 321 rights, beyond their individual access rights).
174 322
323 TODO(epoger): What if the remote file does not exist? This should probably
324 raise an exception in that case.
325
175 Params: 326 Params:
176 bucket: GS bucket 327 bucket: GS bucket
177 path: full path (Posix-style) to the file within that bucket 328 path: full path (Posix-style) to the file within that bucket
178 id_type: must be one of the ID_TYPE_* constants defined above 329 id_type: must be one of the ID_TYPE_* constants defined above
179 id_value: get permissions for users whose id_type field contains this 330 id_value: get permissions for users whose id_type field contains this
180 value 331 value
181 332
182 Returns: the PERMISSION_* constant which has been set for users matching 333 Returns: the PERMISSION_* constant which has been set for users matching
183 this id_type/id_value, on this file; or PERMISSION_NONE if no such 334 this id_type/id_value, on this file; or PERMISSION_NONE if no such
184 permissions have been set. 335 permissions have been set.
185 """ 336 """
186 field = FIELD_BY_ID_TYPE[id_type] 337 field = FIELD_BY_ID_TYPE[id_type]
187 conn = self._create_connection() 338 b = self._connect_to_bucket(bucket_name=bucket)
188 b = conn.get_bucket(bucket_name=bucket)
189 acls = b.get_acl(key_name=path) 339 acls = b.get_acl(key_name=path)
190 matching_entries = [entry for entry in acls.entries.entry_list 340 matching_entries = [entry for entry in acls.entries.entry_list
191 if (entry.scope.type == id_type) and 341 if (entry.scope.type == id_type) and
192 (getattr(entry.scope, field) == id_value)] 342 (getattr(entry.scope, field) == id_value)]
193 if matching_entries: 343 if matching_entries:
194 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) 344 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)
195 return matching_entries[0].permission 345 return matching_entries[0].permission
196 else: 346 else:
197 return PERMISSION_NONE 347 return PERMISSION_NONE
198 348
199 def set_acl(self, bucket, path, id_type, id_value, permission): 349 def set_acl(self, bucket, path, id_type, id_value, permission):
200 """Set partial access permissions on a single file in Google Storage. 350 """Set partial access permissions on a single file in Google Storage.
201 351
202 Note that a single set_acl() call will not guarantee what access rights any 352 Note that a single set_acl() call will not guarantee what access rights any
203 given user will have on a given file, because permissions are additive. 353 given user will have on a given file, because permissions are additive.
204 (E.g., if you set READ permission for a group, but a member of that group 354 (E.g., if you set READ permission for a group, but a member of that group
205 already has WRITE permission, that member will still have WRITE permission.) 355 already has WRITE permission, that member will still have WRITE permission.)
206 TODO(epoger): Do we know that for sure? I *think* that's how it works... 356 TODO(epoger): Do we know that for sure? I *think* that's how it works...
207 357
208 If there is already a permission set on this file for this id_type/id_value 358 If there is already a permission set on this file for this id_type/id_value
209 combination, this call will overwrite it. 359 combination, this call will overwrite it.
210 360
361 TODO(epoger): What if the remote file does not exist? This should probably
362 raise an exception in that case.
363
211 Params: 364 Params:
212 bucket: GS bucket 365 bucket: GS bucket
213 path: full path (Posix-style) to the file within that bucket 366 path: full path (Posix-style) to the file within that bucket
214 id_type: must be one of the ID_TYPE_* constants defined above 367 id_type: must be one of the ID_TYPE_* constants defined above
215 id_value: add permission for users whose id_type field contains this value 368 id_value: add permission for users whose id_type field contains this value
216 permission: permission to add for users matching id_type/id_value; 369 permission: permission to add for users matching id_type/id_value;
217 must be one of the PERMISSION_* constants defined above. 370 must be one of the PERMISSION_* constants defined above.
218 If PERMISSION_NONE, then any permissions will be granted to this 371 If PERMISSION_NONE, then any permissions will be granted to this
219 particular id_type/id_value will be removed... but, given that 372 particular id_type/id_value will be removed... but, given that
220 permissions are additive, specific users may still have access rights 373 permissions are additive, specific users may still have access rights
221 based on permissions given to *other* id_type/id_value pairs. 374 based on permissions given to *other* id_type/id_value pairs.
222 375
223 Example Code: 376 Example Code:
224 bucket = 'gs://bucket-name' 377 bucket = 'gs://bucket-name'
225 path = 'path/to/file' 378 path = 'path/to/file'
226 id_type = ID_TYPE_USER_BY_EMAIL 379 id_type = ID_TYPE_USER_BY_EMAIL
227 id_value = 'epoger@google.com' 380 id_value = 'epoger@google.com'
228 set_acl(bucket, path, id_type, id_value, PERMISSION_READ) 381 set_acl(bucket, path, id_type, id_value, PERMISSION_READ)
229 assert PERMISSION_READ == get_acl(bucket, path, id_type, id_value) 382 assert PERMISSION_READ == get_acl(bucket, path, id_type, id_value)
230 set_acl(bucket, path, id_type, id_value, PERMISSION_WRITE) 383 set_acl(bucket, path, id_type, id_value, PERMISSION_WRITE)
231 assert PERMISSION_WRITE == get_acl(bucket, path, id_type, id_value) 384 assert PERMISSION_WRITE == get_acl(bucket, path, id_type, id_value)
232 """ 385 """
233 field = FIELD_BY_ID_TYPE[id_type] 386 field = FIELD_BY_ID_TYPE[id_type]
234 conn = self._create_connection() 387 b = self._connect_to_bucket(bucket_name=bucket)
235 b = conn.get_bucket(bucket_name=bucket)
236 acls = b.get_acl(key_name=path) 388 acls = b.get_acl(key_name=path)
237 389
238 # Remove any existing entries that refer to the same id_type/id_value, 390 # Remove any existing entries that refer to the same id_type/id_value,
239 # because the API will fail if we try to set more than one. 391 # because the API will fail if we try to set more than one.
240 matching_entries = [entry for entry in acls.entries.entry_list 392 matching_entries = [entry for entry in acls.entries.entry_list
241 if (entry.scope.type == id_type) and 393 if (entry.scope.type == id_type) and
242 (getattr(entry.scope, field) == id_value)] 394 (getattr(entry.scope, field) == id_value)]
243 if matching_entries: 395 if matching_entries:
244 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) 396 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)
245 acls.entries.entry_list.remove(matching_entries[0]) 397 acls.entries.entry_list.remove(matching_entries[0])
246 398
247 # Add a new entry to the ACLs. 399 # Add a new entry to the ACLs.
248 if permission != PERMISSION_NONE: 400 if permission != PERMISSION_NONE:
249 args = {'type': id_type, 'permission': permission} 401 args = {'type': id_type, 'permission': permission}
250 args[field] = id_value 402 args[field] = id_value
251 new_entry = acl.Entry(**args) 403 new_entry = acl.Entry(**args)
252 acls.entries.entry_list.append(new_entry) 404 acls.entries.entry_list.append(new_entry)
253 405
254 # Finally, write back the modified ACLs. 406 # Finally, write back the modified ACLs.
255 b.set_acl(acl_or_str=acls, key_name=path) 407 b.set_acl(acl_or_str=acls, key_name=path)
256 408
257 def list_bucket_contents(self, bucket, subdir=None): 409 def list_bucket_contents(self, bucket, subdir=None):
258 """Returns files in the Google Storage bucket as a (dirs, files) tuple. 410 """Returns files in the Google Storage bucket as a (dirs, files) tuple.
259 411
412 TODO(epoger): This should raise an exception if subdir does not exist in
413 Google Storage; right now, it just returns empty contents.
414
260 Args: 415 Args:
261 bucket: name of the Google Storage bucket 416 bucket: name of the Google Storage bucket
262 subdir: directory within the bucket to list, or None for root directory 417 subdir: directory within the bucket to list, or None for root directory
263 """ 418 """
264 # The GS command relies on the prefix (if any) ending with a slash. 419 # The GS command relies on the prefix (if any) ending with a slash.
265 prefix = subdir or '' 420 prefix = subdir or ''
266 if prefix and not prefix.endswith('/'): 421 if prefix and not prefix.endswith('/'):
267 prefix += '/' 422 prefix += '/'
268 prefix_length = len(prefix) if prefix else 0 423 prefix_length = len(prefix) if prefix else 0
269 424
270 conn = self._create_connection() 425 b = self._connect_to_bucket(bucket_name=bucket)
271 b = conn.get_bucket(bucket_name=bucket)
272 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') 426 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
273 dirs = [] 427 dirs = []
274 files = [] 428 files = []
275 for item in lister: 429 for item in lister:
276 t = type(item) 430 t = type(item)
277 if t is Key: 431 if t is Key:
278 files.append(item.key[prefix_length:]) 432 files.append(item.key[prefix_length:])
279 elif t is Prefix: 433 elif t is Prefix:
280 dirs.append(item.name[prefix_length:-1]) 434 dirs.append(item.name[prefix_length:-1])
281 return (dirs, files) 435 return (dirs, files)
282 436
437 def _connect_to_bucket(self, bucket_name):
438 """Returns a Bucket object we can use to access a particular bucket in GS.
439
440 Params:
441 bucket_name: name of the bucket (e.g., 'chromium-skia-gm')
442 """
443 try:
444 return self._create_connection().get_bucket(bucket_name=bucket_name)
445 except BotoServerError, e:
446 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket_name
447 raise
448
283 def _create_connection(self): 449 def _create_connection(self):
284 """Returns a GSConnection object we can use to access Google Storage.""" 450 """Returns a GSConnection object we can use to access Google Storage."""
285 if self._gs_access_key_id: 451 if self._gs_access_key_id:
286 return GSConnection( 452 return GSConnection(
287 gs_access_key_id=self._gs_access_key_id, 453 gs_access_key_id=self._gs_access_key_id,
288 gs_secret_access_key=self._gs_secret_access_key) 454 gs_secret_access_key=self._gs_secret_access_key)
289 else: 455 else:
290 return AnonymousGSConnection() 456 return AnonymousGSConnection()
291 457
292 def _config_file_as_dict(filepath): 458 def _config_file_as_dict(filepath):
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
342 Do you have a ~/.boto file that provides the credentials needed to read 508 Do you have a ~/.boto file that provides the credentials needed to read
343 and write gs://chromium-skia-gm ? 509 and write gs://chromium-skia-gm ?
344 """ 510 """
345 raise 511 raise
346 512
347 bucket = 'chromium-skia-gm' 513 bucket = 'chromium-skia-gm'
348 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) 514 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
349 subdir = 'subdir' 515 subdir = 'subdir'
350 filenames_to_upload = ['file1', 'file2'] 516 filenames_to_upload = ['file1', 'file2']
351 517
352 # Upload test files to Google Storage. 518 # Upload test files to Google Storage, checking that their fine-grained
519 # ACLs were set correctly.
520 id_type = ID_TYPE_GROUP_BY_DOMAIN
521 id_value = 'chromium.org'
522 set_permission = PERMISSION_READ
353 local_src_dir = tempfile.mkdtemp() 523 local_src_dir = tempfile.mkdtemp()
354 os.mkdir(os.path.join(local_src_dir, subdir)) 524 os.mkdir(os.path.join(local_src_dir, subdir))
355 try: 525 try:
356 for filename in filenames_to_upload: 526 for filename in filenames_to_upload:
357 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: 527 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
358 f.write('contents of %s\n' % filename) 528 f.write('contents of %s\n' % filename)
359 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), 529 dest_path = posixpath.join(remote_dir, subdir, filename)
360 dest_bucket=bucket, 530 gs.upload_file(
361 dest_path=posixpath.join(remote_dir, subdir, filename)) 531 source_path=os.path.join(local_src_dir, subdir, filename),
532 dest_bucket=bucket, dest_path=dest_path,
533 fine_grained_acl_list=[(id_type, id_value, set_permission)])
534 got_permission = gs.get_acl(bucket=bucket, path=dest_path,
535 id_type=id_type, id_value=id_value)
536 assert got_permission == set_permission, '%s == %s' % (
537 got_permission, set_permission)
362 finally: 538 finally:
363 shutil.rmtree(local_src_dir) 539 shutil.rmtree(local_src_dir)
364 540
365 # Get a list of the files we uploaded to Google Storage. 541 # Get a list of the files we uploaded to Google Storage.
366 (dirs, files) = gs.list_bucket_contents( 542 (dirs, files) = gs.list_bucket_contents(
367 bucket=bucket, subdir=remote_dir) 543 bucket=bucket, subdir=remote_dir)
368 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) 544 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir)
369 assert files == [], '%s == []' % files 545 assert files == [], '%s == []' % files
370 (dirs, files) = gs.list_bucket_contents( 546 (dirs, files) = gs.list_bucket_contents(
371 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) 547 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
427 gs.delete_file(bucket=bucket, 603 gs.delete_file(bucket=bucket,
428 path=posixpath.join(remote_dir, subdir, filename)) 604 path=posixpath.join(remote_dir, subdir, filename))
429 605
430 # Confirm that we deleted all the files we uploaded to Google Storage. 606 # Confirm that we deleted all the files we uploaded to Google Storage.
431 (dirs, files) = gs.list_bucket_contents( 607 (dirs, files) = gs.list_bucket_contents(
432 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) 608 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
433 assert dirs == [], '%s == []' % dirs 609 assert dirs == [], '%s == []' % dirs
434 assert files == [], '%s == []' % files 610 assert files == [], '%s == []' % files
435 611
436 612
613 def _test_dir_upload_and_download():
614 """Test upload_dir_contents() and download_dir_contents()."""
615 try:
616 gs = GSUtils(boto_file_path=os.path.expanduser(os.path.join('~','.boto')))
617 except:
618 print """
619 Failed to instantiate GSUtils object with default .boto file path.
620 Do you have a ~/.boto file that provides the credentials needed to read
621 and write gs://chromium-skia-gm ?
622 """
623 raise
624
625 bucket = 'chromium-skia-gm'
626 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
627 subdir = 'subdir'
628 filenames = ['file1', 'file2']
629
630 # Create directory tree on local disk, and upload it.
631 local_src_dir = tempfile.mkdtemp()
632 os.mkdir(os.path.join(local_src_dir, subdir))
633 try:
634 for filename in filenames:
635 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
636 f.write('contents of %s\n' % filename)
637 gs.upload_dir_contents(source_dir=local_src_dir, dest_bucket=bucket,
638 dest_dir=remote_dir)
639 finally:
640 shutil.rmtree(local_src_dir)
641
642 # Validate the list of the files we uploaded to Google Storage.
643 (dirs, files) = gs.list_bucket_contents(
644 bucket=bucket, subdir=remote_dir)
645 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir)
646 assert files == [], '%s == []' % files
647 (dirs, files) = gs.list_bucket_contents(
648 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
649 assert dirs == [], '%s == []' % dirs
650 assert files == filenames, '%s == %s' % (files, filenames)
651
652 # Download the directory tree we just uploaded, make sure its contents
653 # are what we expect, and then delete the tree in Google Storage.
654 local_dest_dir = tempfile.mkdtemp()
655 try:
656 gs.download_dir_contents(source_bucket=bucket, source_dir=remote_dir,
657 dest_dir=local_dest_dir)
658 for filename in filenames:
659 with open(os.path.join(local_dest_dir, subdir, filename)) as f:
660 file_contents = f.read()
661 assert file_contents == 'contents of %s\n' % filename, (
662 '%s == "contents of %s\n"' % (file_contents, filename))
663 finally:
664 shutil.rmtree(local_dest_dir)
665 for filename in filenames:
666 gs.delete_file(bucket=bucket,
667 path=posixpath.join(remote_dir, subdir, filename))
668
669
437 # TODO(epoger): How should we exercise these self-tests? 670 # TODO(epoger): How should we exercise these self-tests?
438 # See http://skbug.com/2751 671 # See http://skbug.com/2751
439 if __name__ == '__main__': 672 if __name__ == '__main__':
440 _test_public_read() 673 _test_public_read()
441 _test_authenticated_round_trip() 674 _test_authenticated_round_trip()
675 _test_dir_upload_and_download()
442 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise 676 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise
443 # an exception when we try to access without needed credentials. 677 # an exception when we try to access without needed credentials.
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698