Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(233)

Side by Side Diff: py/utils/gs_utils.py

Issue 407533002: add upload_dir_contents() and download_dir_contents() to gs_utils.py (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: add fine_grained_acl_list Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper
(...skipping 18 matching lines...) Expand all
29 # Imports from third-party code 29 # Imports from third-party code
30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join(
31 os.path.dirname(__file__), os.pardir, os.pardir)) 31 os.path.dirname(__file__), os.pardir, os.pardir))
32 for import_subdir in ['boto']: 32 for import_subdir in ['boto']:
33 import_dirpath = os.path.join( 33 import_dirpath = os.path.join(
34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir)
35 if import_dirpath not in sys.path: 35 if import_dirpath not in sys.path:
36 # We need to insert at the beginning of the path, to make sure that our 36 # We need to insert at the beginning of the path, to make sure that our
37 # imported versions are favored over others that might be in the path. 37 # imported versions are favored over others that might be in the path.
38 sys.path.insert(0, import_dirpath) 38 sys.path.insert(0, import_dirpath)
39 from boto.exception import BotoServerError
39 from boto.gs import acl 40 from boto.gs import acl
40 from boto.gs.bucket import Bucket 41 from boto.gs.bucket import Bucket
41 from boto.gs.connection import GSConnection 42 from boto.gs.connection import GSConnection
42 from boto.gs.key import Key 43 from boto.gs.key import Key
43 from boto.s3.bucketlistresultset import BucketListResultSet 44 from boto.s3.bucketlistresultset import BucketListResultSet
44 from boto.s3.connection import SubdomainCallingFormat 45 from boto.s3.connection import SubdomainCallingFormat
45 from boto.s3.prefix import Prefix 46 from boto.s3.prefix import Prefix
46 47
47 # Permissions that may be set on each file in Google Storage. 48 # "Canned" ACLs that provide a "base coat" of permissions for each file in
48 # See SupportedPermissions in 49 # Google Storage. See CannedACLStrings in
50 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py
51 CANNED_ACL_AUTHENTICATED_READ = 'authenticated-read'
52 CANNED_ACL_BUCKET_OWNER_FULL_CONTROL = 'bucket-owner-full-control'
53 CANNED_ACL_BUCKET_OWNER_READ = 'bucket-owner-read'
54 CANNED_ACL_PRIVATE = 'private'
55 CANNED_ACL_PROJECT_PRIVATE = 'project-private'
56 CANNED_ACL_PUBLIC_READ = 'public-read'
57 CANNED_ACL_PUBLIC_READ_WRITE = 'public-read-write'
rmistry 2014/07/18 16:55:07 Nit: Change the above to only have one space befor
epoger 2014/07/18 17:36:19 Aligned 'em all.
58
59 # "Fine-grained" permissions that may be set per user/group on each file in
60 # Google Storage. See SupportedPermissions in
49 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py 61 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py
50 PERMISSION_NONE = None 62 PERMISSION_NONE = None
51 PERMISSION_OWNER = 'FULL_CONTROL' 63 PERMISSION_OWNER = 'FULL_CONTROL'
52 PERMISSION_READ = 'READ' 64 PERMISSION_READ = 'READ'
53 PERMISSION_WRITE = 'WRITE' 65 PERMISSION_WRITE = 'WRITE'
54 66
55 # Types of identifiers we can use to set ACLs. 67 # Types of identifiers we can use to set "fine-grained" ACLs.
56 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN 68 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN
57 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL 69 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL
58 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID 70 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID
59 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL 71 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL
60 ID_TYPE_USER_BY_ID = acl.USER_BY_ID 72 ID_TYPE_USER_BY_ID = acl.USER_BY_ID
61 73
62 # Which field we get/set in ACL entries, depending on ID_TYPE. 74 # Which field we get/set in ACL entries, depending on ID_TYPE.
63 FIELD_BY_ID_TYPE = { 75 FIELD_BY_ID_TYPE = {
64 ID_TYPE_GROUP_BY_DOMAIN: 'domain', 76 ID_TYPE_GROUP_BY_DOMAIN: 'domain',
65 ID_TYPE_GROUP_BY_EMAIL: 'email_address', 77 ID_TYPE_GROUP_BY_EMAIL: 'email_address',
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 def delete_file(self, bucket, path): 125 def delete_file(self, bucket, path):
114 """Delete a single file within a GS bucket. 126 """Delete a single file within a GS bucket.
115 127
116 TODO(epoger): what if bucket or path does not exist? Should probably raise 128 TODO(epoger): what if bucket or path does not exist? Should probably raise
117 an exception. Implement, and add a test to exercise this. 129 an exception. Implement, and add a test to exercise this.
118 130
119 Params: 131 Params:
120 bucket: GS bucket to delete a file from 132 bucket: GS bucket to delete a file from
121 path: full path (Posix-style) of the file within the bucket to delete 133 path: full path (Posix-style) of the file within the bucket to delete
122 """ 134 """
123 conn = self._create_connection() 135 b = self._connect_to_bucket(bucket_name=bucket)
124 b = conn.get_bucket(bucket_name=bucket)
125 item = Key(b) 136 item = Key(b)
126 item.key = path 137 item.key = path
127 item.delete() 138 try:
139 item.delete()
140 except BotoServerError, e:
epoger 2014/07/18 15:48:19 While I was in here, improved various error messag
141 e.body = (repr(e.body) +
142 ' while deleting bucket=%s, path=%s' % (bucket, path))
143 raise
128 144
129 def upload_file(self, source_path, dest_bucket, dest_path): 145 def upload_file(self, source_path, dest_bucket, dest_path,
146 canned_acl=CANNED_ACL_PRIVATE, fine_grained_acl_list=None):
epoger 2014/07/18 15:48:19 I didn't add the http_header_lines param we had in
130 """Upload contents of a local file to Google Storage. 147 """Upload contents of a local file to Google Storage.
131 148
132 TODO(epoger): Add the extra parameters provided by upload_file() within 149 TODO(epoger): Add the only_if_modified param provided by upload_file() in
133 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts /utils/old_gs_utils.py , 150 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts /utils/old_gs_utils.py ,
134 so we can replace that function with this one. 151 so we can replace that function with this one.
135 152
136 params: 153 params:
137 source_path: full path (local-OS-style) on local disk to read from 154 source_path: full path (local-OS-style) on local disk to read from
138 dest_bucket: GCS bucket to copy the file to 155 dest_bucket: GCS bucket to copy the file to
139 dest_path: full path (Posix-style) within that bucket 156 dest_path: full path (Posix-style) within that bucket
157 canned_acl: which predefined ACL to apply to the file on Google Storage;
158 must be one of the CANNED_ACL_* constants defined above.
159 TODO(epoger): add unittests for this param, although it seems to work
160 in my manual testing
rmistry 2014/07/18 16:55:07 This should be allowed to be None because if all y
epoger 2014/07/18 17:36:19 I agree, sort of. Searching for "predefined" in h
161 fine_grained_acl_list: list of (id_type, id_value, permission) tuples
162 to apply to the uploaded file, or None if canned_acl is sufficient
140 """ 163 """
141 conn = self._create_connection() 164 b = self._connect_to_bucket(bucket_name=dest_bucket)
142 b = conn.get_bucket(bucket_name=dest_bucket)
143 item = Key(b) 165 item = Key(b)
144 item.key = dest_path 166 item.key = dest_path
145 item.set_contents_from_filename(filename=source_path) 167 try:
168 item.set_contents_from_filename(filename=source_path, policy=canned_acl)
169 except BotoServerError, e:
170 e.body = (repr(e.body) +
171 ' while uploading source_path=%s to bucket=%s, path=%s' % (
172 source_path, dest_bucket, item.key))
173 raise
174 # TODO(epoger): This may be inefficient, because it calls
175 # _connect_to_bucket() again. Depending on how expensive that
176 # call is, we may want to optimize this.
177 for (id_type, id_value, permission) in fine_grained_acl_list or []:
178 self.set_acl(
179 bucket=dest_bucket, path=item.key,
180 id_type=id_type, id_value=id_value, permission=permission)
181
182 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir,
183 canned_acl=CANNED_ACL_PRIVATE,
184 fine_grained_acl_list=None):
185 """Recursively upload contents of a local directory to Google Storage.
186
187 params:
188 source_dir: full path (local-OS-style) on local disk of directory to copy
189 contents of
190 dest_bucket: GCS bucket to copy the files into
191 dest_dir: full path (Posix-style) within that bucket; write the files into
192 this directory
193 canned_acl: which predefined ACL to apply to the files on Google Storage;
194 must be one of the CANNED_ACL_* constants defined above.
195 TODO(epoger): add unittests for this param, although it seems to work
196 in my manual testing
197 fine_grained_acl_list: list of (id_type, id_value, permission) tuples
198 to apply to every file uploaded, or None if canned_acl is sufficient
199 TODO(epoger): add unittests for this param, although it seems to work
200 in my manual testing
201
202 The copy operates as a "merge with overwrite": any files in source_dir will
203 be "overlaid" on top of the existing content in dest_dir. Existing files
204 with the same names will be overwritten.
205
206 TODO(epoger): Upload multiple files simultaneously to reduce latency.
epoger 2014/07/18 15:48:18 All these TODOs copied in from https://skia.google
207
208 TODO(epoger): Add a "noclobber" mode that will not upload any files would
209 overwrite existing files in Google Storage.
210
211 TODO(epoger): Consider adding a do_compress parameter that would compress
212 the file using gzip before upload, and add a "Content-Encoding:gzip" header
213 so that HTTP downloads of the file would be unzipped automatically.
214 See https://developers.google.com/storage/docs/gsutil/addlhelp/
215 WorkingWithObjectMetadata#content-encoding
216 """
217 b = self._connect_to_bucket(bucket_name=dest_bucket)
218 for filename in sorted(os.listdir(source_dir)):
219 local_path = os.path.join(source_dir, filename)
220 if os.path.isdir(local_path):
221 self.upload_dir_contents( # recurse
222 source_dir=local_path, dest_bucket=dest_bucket,
223 dest_dir=posixpath.join(dest_dir, filename),
224 canned_acl=canned_acl)
225 else:
226 item = Key(b)
227 item.key = posixpath.join(dest_dir, filename)
228 try:
229 item.set_contents_from_filename(
230 filename=local_path, policy=canned_acl)
231 except BotoServerError, e:
232 e.body = (repr(e.body) +
233 ' while uploading local_path=%s to bucket=%s, path=%s' % (
234 local_path, dest_bucket, item.key))
235 raise
236 # TODO(epoger): This may be inefficient, because it calls
237 # _connect_to_bucket() for every file. Depending on how expensive that
238 # call is, we may want to optimize this.
239 for (id_type, id_value, permission) in fine_grained_acl_list or []:
240 self.set_acl(
241 bucket=dest_bucket, path=item.key,
242 id_type=id_type, id_value=id_value, permission=permission)
146 243
147 def download_file(self, source_bucket, source_path, dest_path, 244 def download_file(self, source_bucket, source_path, dest_path,
148 create_subdirs_if_needed=False): 245 create_subdirs_if_needed=False):
149 """Downloads a single file from Google Cloud Storage to local disk. 246 """Downloads a single file from Google Cloud Storage to local disk.
150 247
151 Args: 248 Args:
152 source_bucket: GCS bucket to download the file from 249 source_bucket: GCS bucket to download the file from
153 source_path: full path (Posix-style) within that bucket 250 source_path: full path (Posix-style) within that bucket
154 dest_path: full path (local-OS-style) on local disk to copy the file to 251 dest_path: full path (local-OS-style) on local disk to copy the file to
155 create_subdirs_if_needed: boolean; whether to create subdirectories as 252 create_subdirs_if_needed: boolean; whether to create subdirectories as
156 needed to create dest_path 253 needed to create dest_path
157 """ 254 """
158 conn = self._create_connection() 255 b = self._connect_to_bucket(bucket_name=source_bucket)
159 b = conn.get_bucket(bucket_name=source_bucket)
160 item = Key(b) 256 item = Key(b)
161 item.key = source_path 257 item.key = source_path
162 if create_subdirs_if_needed: 258 if create_subdirs_if_needed:
163 _makedirs_if_needed(os.path.dirname(dest_path)) 259 _makedirs_if_needed(os.path.dirname(dest_path))
164 with open(dest_path, 'w') as f: 260 with open(dest_path, 'w') as f:
165 item.get_contents_to_file(fp=f) 261 try:
262 item.get_contents_to_file(fp=f)
263 except BotoServerError, e:
264 e.body = (repr(e.body) +
265 ' while downloading bucket=%s, path=%s to local_path=%s' % (
266 source_bucket, source_path, dest_path))
267 raise
268
269 def download_dir_contents(self, source_bucket, source_dir, dest_dir):
270 """Recursively download contents of a Google Storage directory to local disk
271
272 params:
273 source_bucket: GCS bucket to copy the files from
274 source_dir: full path (Posix-style) within that bucket; read the files
275 from this directory
276 dest_dir: full path (local-OS-style) on local disk of directory to copy
277 the files into
278
279 The copy operates as a "merge with overwrite": any files in source_dir will
280 be "overlaid" on top of the existing content in dest_dir. Existing files
281 with the same names will be overwritten.
282
283 TODO(epoger): Download multiple files simultaneously to reduce latency.
284 """
285 _makedirs_if_needed(dest_dir)
286 b = self._connect_to_bucket(bucket_name=source_bucket)
287 (dirs, files) = self.list_bucket_contents(
288 bucket=source_bucket, subdir=source_dir)
289
290 for filename in files:
291 item = Key(b)
292 item.key = posixpath.join(source_dir, filename)
293 dest_path = os.path.join(dest_dir, filename)
294 with open(dest_path, 'w') as f:
295 try:
296 item.get_contents_to_file(fp=f)
297 except BotoServerError, e:
298 e.body = (repr(e.body) +
299 ' while downloading bucket=%s, path=%s to local_path=%s' % (
300 source_bucket, item.key, dest_path))
301 raise
302
303 for dirname in dirs:
304 self.download_dir_contents( # recurse
305 source_bucket=source_bucket,
306 source_dir=posixpath.join(source_dir, dirname),
307 dest_dir=os.path.join(dest_dir, dirname))
166 308
167 def get_acl(self, bucket, path, id_type, id_value): 309 def get_acl(self, bucket, path, id_type, id_value):
168 """Retrieve partial access permissions on a single file in Google Storage. 310 """Retrieve partial access permissions on a single file in Google Storage.
169 311
170 Various users who match this id_type/id_value pair may have access rights 312 Various users who match this id_type/id_value pair may have access rights
171 other than that returned by this call, if they have been granted those 313 other than that returned by this call, if they have been granted those
172 rights based on *other* id_types (e.g., perhaps they have group access 314 rights based on *other* id_types (e.g., perhaps they have group access
173 rights, beyond their individual access rights). 315 rights, beyond their individual access rights).
174 316
317 TODO(epoger): What if the remote file does not exist? This should probably
318 raise an exception in that case.
319
175 Params: 320 Params:
176 bucket: GS bucket 321 bucket: GS bucket
177 path: full path (Posix-style) to the file within that bucket 322 path: full path (Posix-style) to the file within that bucket
178 id_type: must be one of the ID_TYPE_* constants defined above 323 id_type: must be one of the ID_TYPE_* constants defined above
179 id_value: get permissions for users whose id_type field contains this 324 id_value: get permissions for users whose id_type field contains this
180 value 325 value
181 326
182 Returns: the PERMISSION_* constant which has been set for users matching 327 Returns: the PERMISSION_* constant which has been set for users matching
183 this id_type/id_value, on this file; or PERMISSION_NONE if no such 328 this id_type/id_value, on this file; or PERMISSION_NONE if no such
184 permissions have been set. 329 permissions have been set.
185 """ 330 """
186 field = FIELD_BY_ID_TYPE[id_type] 331 field = FIELD_BY_ID_TYPE[id_type]
187 conn = self._create_connection() 332 b = self._connect_to_bucket(bucket_name=bucket)
188 b = conn.get_bucket(bucket_name=bucket)
189 acls = b.get_acl(key_name=path) 333 acls = b.get_acl(key_name=path)
190 matching_entries = [entry for entry in acls.entries.entry_list 334 matching_entries = [entry for entry in acls.entries.entry_list
191 if (entry.scope.type == id_type) and 335 if (entry.scope.type == id_type) and
192 (getattr(entry.scope, field) == id_value)] 336 (getattr(entry.scope, field) == id_value)]
193 if matching_entries: 337 if matching_entries:
194 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) 338 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)
195 return matching_entries[0].permission 339 return matching_entries[0].permission
196 else: 340 else:
197 return PERMISSION_NONE 341 return PERMISSION_NONE
198 342
199 def set_acl(self, bucket, path, id_type, id_value, permission): 343 def set_acl(self, bucket, path, id_type, id_value, permission):
200 """Set partial access permissions on a single file in Google Storage. 344 """Set partial access permissions on a single file in Google Storage.
201 345
202 Note that a single set_acl() call will not guarantee what access rights any 346 Note that a single set_acl() call will not guarantee what access rights any
203 given user will have on a given file, because permissions are additive. 347 given user will have on a given file, because permissions are additive.
204 (E.g., if you set READ permission for a group, but a member of that group 348 (E.g., if you set READ permission for a group, but a member of that group
205 already has WRITE permission, that member will still have WRITE permission.) 349 already has WRITE permission, that member will still have WRITE permission.)
206 TODO(epoger): Do we know that for sure? I *think* that's how it works... 350 TODO(epoger): Do we know that for sure? I *think* that's how it works...
207 351
208 If there is already a permission set on this file for this id_type/id_value 352 If there is already a permission set on this file for this id_type/id_value
209 combination, this call will overwrite it. 353 combination, this call will overwrite it.
210 354
355 TODO(epoger): What if the remote file does not exist? This should probably
356 raise an exception in that case.
357
211 Params: 358 Params:
212 bucket: GS bucket 359 bucket: GS bucket
213 path: full path (Posix-style) to the file within that bucket 360 path: full path (Posix-style) to the file within that bucket
214 id_type: must be one of the ID_TYPE_* constants defined above 361 id_type: must be one of the ID_TYPE_* constants defined above
215 id_value: add permission for users whose id_type field contains this value 362 id_value: add permission for users whose id_type field contains this value
216 permission: permission to add for users matching id_type/id_value; 363 permission: permission to add for users matching id_type/id_value;
217 must be one of the PERMISSION_* constants defined above. 364 must be one of the PERMISSION_* constants defined above.
218 If PERMISSION_NONE, then any permissions will be granted to this 365 If PERMISSION_NONE, then any permissions will be granted to this
219 particular id_type/id_value will be removed... but, given that 366 particular id_type/id_value will be removed... but, given that
220 permissions are additive, specific users may still have access rights 367 permissions are additive, specific users may still have access rights
221 based on permissions given to *other* id_type/id_value pairs. 368 based on permissions given to *other* id_type/id_value pairs.
222 369
223 Example Code: 370 Example Code:
224 bucket = 'gs://bucket-name' 371 bucket = 'gs://bucket-name'
225 path = 'path/to/file' 372 path = 'path/to/file'
226 id_type = ID_TYPE_USER_BY_EMAIL 373 id_type = ID_TYPE_USER_BY_EMAIL
227 id_value = 'epoger@google.com' 374 id_value = 'epoger@google.com'
228 set_acl(bucket, path, id_type, id_value, PERMISSION_READ) 375 set_acl(bucket, path, id_type, id_value, PERMISSION_READ)
229 assert PERMISSION_READ == get_acl(bucket, path, id_type, id_value) 376 assert PERMISSION_READ == get_acl(bucket, path, id_type, id_value)
230 set_acl(bucket, path, id_type, id_value, PERMISSION_WRITE) 377 set_acl(bucket, path, id_type, id_value, PERMISSION_WRITE)
231 assert PERMISSION_WRITE == get_acl(bucket, path, id_type, id_value) 378 assert PERMISSION_WRITE == get_acl(bucket, path, id_type, id_value)
232 """ 379 """
233 field = FIELD_BY_ID_TYPE[id_type] 380 field = FIELD_BY_ID_TYPE[id_type]
234 conn = self._create_connection() 381 b = self._connect_to_bucket(bucket_name=bucket)
235 b = conn.get_bucket(bucket_name=bucket)
236 acls = b.get_acl(key_name=path) 382 acls = b.get_acl(key_name=path)
237 383
238 # Remove any existing entries that refer to the same id_type/id_value, 384 # Remove any existing entries that refer to the same id_type/id_value,
239 # because the API will fail if we try to set more than one. 385 # because the API will fail if we try to set more than one.
240 matching_entries = [entry for entry in acls.entries.entry_list 386 matching_entries = [entry for entry in acls.entries.entry_list
241 if (entry.scope.type == id_type) and 387 if (entry.scope.type == id_type) and
242 (getattr(entry.scope, field) == id_value)] 388 (getattr(entry.scope, field) == id_value)]
243 if matching_entries: 389 if matching_entries:
244 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) 390 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)
245 acls.entries.entry_list.remove(matching_entries[0]) 391 acls.entries.entry_list.remove(matching_entries[0])
246 392
247 # Add a new entry to the ACLs. 393 # Add a new entry to the ACLs.
248 if permission != PERMISSION_NONE: 394 if permission != PERMISSION_NONE:
249 args = {'type': id_type, 'permission': permission} 395 args = {'type': id_type, 'permission': permission}
250 args[field] = id_value 396 args[field] = id_value
251 new_entry = acl.Entry(**args) 397 new_entry = acl.Entry(**args)
252 acls.entries.entry_list.append(new_entry) 398 acls.entries.entry_list.append(new_entry)
253 399
254 # Finally, write back the modified ACLs. 400 # Finally, write back the modified ACLs.
255 b.set_acl(acl_or_str=acls, key_name=path) 401 b.set_acl(acl_or_str=acls, key_name=path)
256 402
257 def list_bucket_contents(self, bucket, subdir=None): 403 def list_bucket_contents(self, bucket, subdir=None):
258 """Returns files in the Google Storage bucket as a (dirs, files) tuple. 404 """Returns files in the Google Storage bucket as a (dirs, files) tuple.
259 405
406 TODO(epoger): This should raise an exception if subdir does not exist in
407 Google Storage; right now, it just returns empty contents.
408
260 Args: 409 Args:
261 bucket: name of the Google Storage bucket 410 bucket: name of the Google Storage bucket
262 subdir: directory within the bucket to list, or None for root directory 411 subdir: directory within the bucket to list, or None for root directory
263 """ 412 """
264 # The GS command relies on the prefix (if any) ending with a slash. 413 # The GS command relies on the prefix (if any) ending with a slash.
265 prefix = subdir or '' 414 prefix = subdir or ''
266 if prefix and not prefix.endswith('/'): 415 if prefix and not prefix.endswith('/'):
267 prefix += '/' 416 prefix += '/'
268 prefix_length = len(prefix) if prefix else 0 417 prefix_length = len(prefix) if prefix else 0
269 418
270 conn = self._create_connection() 419 b = self._connect_to_bucket(bucket_name=bucket)
271 b = conn.get_bucket(bucket_name=bucket)
272 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') 420 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
273 dirs = [] 421 dirs = []
274 files = [] 422 files = []
275 for item in lister: 423 for item in lister:
276 t = type(item) 424 t = type(item)
277 if t is Key: 425 if t is Key:
278 files.append(item.key[prefix_length:]) 426 files.append(item.key[prefix_length:])
279 elif t is Prefix: 427 elif t is Prefix:
280 dirs.append(item.name[prefix_length:-1]) 428 dirs.append(item.name[prefix_length:-1])
281 return (dirs, files) 429 return (dirs, files)
282 430
431 def _connect_to_bucket(self, bucket_name):
432 """Returns a Bucket object we can use to access a particular bucket in GS.
433
434 Params:
435 bucket_name: name of the bucket (e.g., 'chromium-skia-gm')
436 """
437 try:
438 return self._create_connection().get_bucket(bucket_name=bucket_name)
439 except BotoServerError, e:
440 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket_name
441 raise
442
283 def _create_connection(self): 443 def _create_connection(self):
284 """Returns a GSConnection object we can use to access Google Storage.""" 444 """Returns a GSConnection object we can use to access Google Storage."""
285 if self._gs_access_key_id: 445 if self._gs_access_key_id:
286 return GSConnection( 446 return GSConnection(
287 gs_access_key_id=self._gs_access_key_id, 447 gs_access_key_id=self._gs_access_key_id,
288 gs_secret_access_key=self._gs_secret_access_key) 448 gs_secret_access_key=self._gs_secret_access_key)
289 else: 449 else:
290 return AnonymousGSConnection() 450 return AnonymousGSConnection()
291 451
292 def _config_file_as_dict(filepath): 452 def _config_file_as_dict(filepath):
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
342 Do you have a ~/.boto file that provides the credentials needed to read 502 Do you have a ~/.boto file that provides the credentials needed to read
343 and write gs://chromium-skia-gm ? 503 and write gs://chromium-skia-gm ?
344 """ 504 """
345 raise 505 raise
346 506
347 bucket = 'chromium-skia-gm' 507 bucket = 'chromium-skia-gm'
348 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) 508 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
349 subdir = 'subdir' 509 subdir = 'subdir'
350 filenames_to_upload = ['file1', 'file2'] 510 filenames_to_upload = ['file1', 'file2']
351 511
352 # Upload test files to Google Storage. 512 # Upload test files to Google Storage, checking that their fine-grained
513 # ACLs were set correctly.
514 id_type = ID_TYPE_GROUP_BY_DOMAIN
515 id_value = 'chromium.org'
516 set_permission = PERMISSION_READ
353 local_src_dir = tempfile.mkdtemp() 517 local_src_dir = tempfile.mkdtemp()
354 os.mkdir(os.path.join(local_src_dir, subdir)) 518 os.mkdir(os.path.join(local_src_dir, subdir))
355 try: 519 try:
356 for filename in filenames_to_upload: 520 for filename in filenames_to_upload:
357 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: 521 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
358 f.write('contents of %s\n' % filename) 522 f.write('contents of %s\n' % filename)
359 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), 523 dest_path = posixpath.join(remote_dir, subdir, filename)
360 dest_bucket=bucket, 524 gs.upload_file(
361 dest_path=posixpath.join(remote_dir, subdir, filename)) 525 source_path=os.path.join(local_src_dir, subdir, filename),
526 dest_bucket=bucket, dest_path=dest_path,
527 fine_grained_acl_list=[(id_type, id_value, set_permission)])
528 got_permission = gs.get_acl(bucket=bucket, path=dest_path,
529 id_type=id_type, id_value=id_value)
530 assert got_permission == set_permission, '%s == %s' % (
531 got_permission, set_permission)
362 finally: 532 finally:
363 shutil.rmtree(local_src_dir) 533 shutil.rmtree(local_src_dir)
364 534
365 # Get a list of the files we uploaded to Google Storage. 535 # Get a list of the files we uploaded to Google Storage.
366 (dirs, files) = gs.list_bucket_contents( 536 (dirs, files) = gs.list_bucket_contents(
367 bucket=bucket, subdir=remote_dir) 537 bucket=bucket, subdir=remote_dir)
368 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) 538 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir)
369 assert files == [], '%s == []' % files 539 assert files == [], '%s == []' % files
370 (dirs, files) = gs.list_bucket_contents( 540 (dirs, files) = gs.list_bucket_contents(
371 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) 541 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
427 gs.delete_file(bucket=bucket, 597 gs.delete_file(bucket=bucket,
428 path=posixpath.join(remote_dir, subdir, filename)) 598 path=posixpath.join(remote_dir, subdir, filename))
429 599
430 # Confirm that we deleted all the files we uploaded to Google Storage. 600 # Confirm that we deleted all the files we uploaded to Google Storage.
431 (dirs, files) = gs.list_bucket_contents( 601 (dirs, files) = gs.list_bucket_contents(
432 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) 602 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
433 assert dirs == [], '%s == []' % dirs 603 assert dirs == [], '%s == []' % dirs
434 assert files == [], '%s == []' % files 604 assert files == [], '%s == []' % files
435 605
436 606
607 def _test_dir_upload_and_download():
608 """Test upload_dir_contents() and download_dir_contents()."""
609 try:
610 gs = GSUtils(boto_file_path=os.path.expanduser(os.path.join('~','.boto')))
611 except:
612 print """
613 Failed to instantiate GSUtils object with default .boto file path.
614 Do you have a ~/.boto file that provides the credentials needed to read
615 and write gs://chromium-skia-gm ?
616 """
617 raise
618
619 bucket = 'chromium-skia-gm'
620 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
621 subdir = 'subdir'
622 filenames = ['file1', 'file2']
623
624 # Create directory tree on local disk, and upload it.
625 local_src_dir = tempfile.mkdtemp()
626 os.mkdir(os.path.join(local_src_dir, subdir))
627 try:
628 for filename in filenames:
629 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
630 f.write('contents of %s\n' % filename)
631 gs.upload_dir_contents(source_dir=local_src_dir, dest_bucket=bucket,
632 dest_dir=remote_dir)
633 finally:
634 shutil.rmtree(local_src_dir)
635
636 # Validate the list of the files we uploaded to Google Storage.
637 (dirs, files) = gs.list_bucket_contents(
638 bucket=bucket, subdir=remote_dir)
639 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir)
640 assert files == [], '%s == []' % files
641 (dirs, files) = gs.list_bucket_contents(
642 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
643 assert dirs == [], '%s == []' % dirs
644 assert files == filenames, '%s == %s' % (files, filenames)
645
646 # Download the directory tree we just uploaded, make sure its contents
647 # are what we expect, and then delete the tree in Google Storage.
648 local_dest_dir = tempfile.mkdtemp()
649 try:
650 gs.download_dir_contents(source_bucket=bucket, source_dir=remote_dir,
651 dest_dir=local_dest_dir)
652 for filename in filenames:
653 with open(os.path.join(local_dest_dir, subdir, filename)) as f:
654 file_contents = f.read()
655 assert file_contents == 'contents of %s\n' % filename, (
656 '%s == "contents of %s\n"' % (file_contents, filename))
657 finally:
658 shutil.rmtree(local_dest_dir)
659 for filename in filenames:
660 gs.delete_file(bucket=bucket,
661 path=posixpath.join(remote_dir, subdir, filename))
662
663
437 # TODO(epoger): How should we exercise these self-tests? 664 # TODO(epoger): How should we exercise these self-tests?
438 # See http://skbug.com/2751 665 # See http://skbug.com/2751
439 if __name__ == '__main__': 666 if __name__ == '__main__':
440 _test_public_read() 667 _test_public_read()
441 _test_authenticated_round_trip() 668 _test_authenticated_round_trip()
669 _test_dir_upload_and_download()
442 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise 670 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise
443 # an exception when we try to access without needed credentials. 671 # an exception when we try to access without needed credentials.
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698