Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(944)

Side by Side Diff: py/utils/gs_utils.py

Issue 418503005: GSUtils: allow uploads to happen ALWAYS, IF_NEW, or IF_MODIFIED (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: refactor upload_dir_contents() to call upload_file() Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
94 PUBLIC_READ_WRITE = 'public-read-write' 94 PUBLIC_READ_WRITE = 'public-read-write'
95 95
96 class IdType: 96 class IdType:
97 """Types of identifiers we can use to set "fine-grained" ACLs.""" 97 """Types of identifiers we can use to set "fine-grained" ACLs."""
98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN 98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN
99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL 99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL
100 GROUP_BY_ID = acl.GROUP_BY_ID 100 GROUP_BY_ID = acl.GROUP_BY_ID
101 USER_BY_EMAIL = acl.USER_BY_EMAIL 101 USER_BY_EMAIL = acl.USER_BY_EMAIL
102 USER_BY_ID = acl.USER_BY_ID 102 USER_BY_ID = acl.USER_BY_ID
103 103
104 class UploadIf:
105 """Cases in which we will upload a file.
106
107 Beware of performance tradeoffs. E.g., if the file is small, the extra
108 round trip to check for file existence and/or checksum may take longer than
109 just uploading the file."""
110 ALWAYS = 1 # always upload the file
111 IF_NEW = 2 # if there is an existing file with the same name,
112 # leave it alone
113 IF_MODIFIED = 3 # if there is an existing file with the same name and
114 # contents, leave it alone
104 115
105 def __init__(self, boto_file_path=None): 116 def __init__(self, boto_file_path=None):
106 """Constructor. 117 """Constructor.
107 118
108 Params: 119 Params:
109 boto_file_path: full path (local-OS-style) on local disk where .boto 120 boto_file_path: full path (local-OS-style) on local disk where .boto
110 credentials file can be found. If None, then the GSUtils object 121 credentials file can be found. If None, then the GSUtils object
111 created will be able to access only public files in Google Storage. 122 created will be able to access only public files in Google Storage.
112 123
113 Raises an exception if no file is found at boto_file_path, or if the file 124 Raises an exception if no file is found at boto_file_path, or if the file
(...skipping 18 matching lines...) Expand all
132 def delete_file(self, bucket, path): 143 def delete_file(self, bucket, path):
133 """Delete a single file within a GS bucket. 144 """Delete a single file within a GS bucket.
134 145
135 TODO(epoger): what if bucket or path does not exist? Should probably raise 146 TODO(epoger): what if bucket or path does not exist? Should probably raise
136 an exception. Implement, and add a test to exercise this. 147 an exception. Implement, and add a test to exercise this.
137 148
138 Params: 149 Params:
139 bucket: GS bucket to delete a file from 150 bucket: GS bucket to delete a file from
140 path: full path (Posix-style) of the file within the bucket to delete 151 path: full path (Posix-style) of the file within the bucket to delete
141 """ 152 """
142 b = self._connect_to_bucket(bucket_name=bucket) 153 b = self._connect_to_bucket(bucket=bucket)
143 key = Key(b) 154 key = Key(b)
144 key.name = path 155 key.name = path
145 try: 156 try:
146 key.delete() 157 key.delete()
147 except BotoServerError, e: 158 except BotoServerError, e:
148 e.body = (repr(e.body) + 159 e.body = (repr(e.body) +
149 ' while deleting bucket=%s, path=%s' % (bucket, path)) 160 ' while deleting bucket=%s, path=%s' % (bucket, path))
150 raise 161 raise
151 162
152 def get_last_modified_time(self, bucket, path): 163 def get_last_modified_time(self, bucket, path):
153 """Gets the timestamp of when this file was last modified. 164 """Gets the timestamp of when this file was last modified.
154 165
155 Params: 166 Params:
156 bucket: GS bucket in which to look for the file 167 bucket: GS bucket in which to look for the file
157 path: full path (Posix-style) of the file within the bucket to check 168 path: full path (Posix-style) of the file within the bucket to check
158 169
159 Returns the last modified time, as a freeform string. If the file was not 170 Returns the last modified time, as a freeform string. If the file was not
160 found, returns None. 171 found, returns None.
161 """ 172 """
162 b = self._connect_to_bucket(bucket_name=bucket) 173 b = self._connect_to_bucket(bucket=bucket)
163 try: 174 try:
164 key = b.get_key(key_name=path) 175 key = b.get_key(key_name=path)
165 if not key: 176 if not key:
166 return None 177 return None
167 return key.last_modified 178 return key.last_modified
168 except BotoServerError, e: 179 except BotoServerError, e:
169 e.body = (repr(e.body) + 180 e.body = (repr(e.body) +
170 ' while getting attributes of bucket=%s, path=%s' % ( 181 ' while getting attributes of bucket=%s, path=%s' % (
171 bucket, path)) 182 bucket, path))
172 raise 183 raise
173 184
174 def upload_file(self, source_path, dest_bucket, dest_path, 185 def upload_file(self, source_path, dest_bucket, dest_path,
175 only_if_modified=False, predefined_acl=None, 186 upload_if=UploadIf.ALWAYS,
187 predefined_acl=None,
176 fine_grained_acl_list=None): 188 fine_grained_acl_list=None):
177 """Upload contents of a local file to Google Storage. 189 """Upload contents of a local file to Google Storage.
178 190
179 params: 191 params:
180 source_path: full path (local-OS-style) on local disk to read from 192 source_path: full path (local-OS-style) on local disk to read from
181 dest_bucket: GCS bucket to copy the file to 193 dest_bucket: GS bucket to copy the file to
182 dest_path: full path (Posix-style) within that bucket 194 dest_path: full path (Posix-style) within that bucket
183 only_if_modified: if True, only upload the file if it would actually 195 upload_if: one of the UploadIf values, describing in which cases we should
184 change the content on Google Storage (uploads the file if dest_path 196 upload the file
185 does not exist, or if it exists but has different contents than
186 source_path). Note that this may take longer than just uploading the
187 file without checking first, due to extra round-trips!
188 predefined_acl: which predefined ACL to apply to the file on Google 197 predefined_acl: which predefined ACL to apply to the file on Google
189 Storage; must be one of the PredefinedACL values defined above. 198 Storage; must be one of the PredefinedACL values defined above.
190 If None, inherits dest_bucket's default object ACL. 199 If None, inherits dest_bucket's default object ACL.
191 TODO(epoger): add unittests for this param, although it seems to work
192 in my manual testing
193 fine_grained_acl_list: list of (id_type, id_value, permission) tuples 200 fine_grained_acl_list: list of (id_type, id_value, permission) tuples
194 to apply to the uploaded file (on top of the predefined_acl), 201 to apply to the uploaded file (on top of the predefined_acl),
195 or None if predefined_acl is sufficient 202 or None if predefined_acl is sufficient
203
204 TODO(epoger): Consider adding a do_compress parameter that would compress
205 the file using gzip before upload, and add a "Content-Encoding:gzip" header
206 so that HTTP downloads of the file would be unzipped automatically.
207 See https://developers.google.com/storage/docs/gsutil/addlhelp/
208 WorkingWithObjectMetadata#content-encoding
196 """ 209 """
197 b = self._connect_to_bucket(bucket_name=dest_bucket) 210 b = self._connect_to_bucket(bucket=dest_bucket)
198 211
199 if only_if_modified: 212 if upload_if == self.UploadIf.IF_NEW:
213 old_key = b.get_key(key_name=dest_path)
214 if old_key:
215 print 'Skipping upload of existing file gs://%s/%s' % (
216 dest_bucket, dest_path)
217 return
218 elif upload_if == self.UploadIf.IF_MODIFIED:
200 old_key = b.get_key(key_name=dest_path) 219 old_key = b.get_key(key_name=dest_path)
201 if old_key: 220 if old_key:
202 local_md5 = '"%s"' % _get_local_md5(path=source_path) 221 local_md5 = '"%s"' % _get_local_md5(path=source_path)
203 if local_md5 == old_key.etag: 222 if local_md5 == old_key.etag:
204 print 'Skipping upload of unmodified file %s : %s' % ( 223 print 'Skipping upload of unmodified file gs://%s/%s : %s' % (
205 source_path, local_md5) 224 dest_bucket, dest_path, local_md5)
206 return 225 return
207 226
208 key = Key(b) 227 key = Key(b)
209 key.name = dest_path 228 key.name = dest_path
210 try: 229 try:
211 key.set_contents_from_filename(filename=source_path, 230 key.set_contents_from_filename(filename=source_path,
212 policy=predefined_acl) 231 policy=predefined_acl)
213 except BotoServerError, e: 232 except BotoServerError, e:
214 e.body = (repr(e.body) + 233 e.body = (repr(e.body) +
215 ' while uploading source_path=%s to bucket=%s, path=%s' % ( 234 ' while uploading source_path=%s to bucket=%s, path=%s' % (
216 source_path, dest_bucket, key.name)) 235 source_path, dest_bucket, key.name))
217 raise 236 raise
218 # TODO(epoger): This may be inefficient, because it calls
219 # _connect_to_bucket() again. Depending on how expensive that
220 # call is, we may want to optimize this.
221 for (id_type, id_value, permission) in fine_grained_acl_list or []: 237 for (id_type, id_value, permission) in fine_grained_acl_list or []:
222 self.set_acl( 238 self.set_acl(
223 bucket=dest_bucket, path=key.name, 239 bucket=b, path=key.name,
224 id_type=id_type, id_value=id_value, permission=permission) 240 id_type=id_type, id_value=id_value, permission=permission)
225 241
226 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, 242 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, **kwargs):
227 predefined_acl=None, fine_grained_acl_list=None):
228 """Recursively upload contents of a local directory to Google Storage. 243 """Recursively upload contents of a local directory to Google Storage.
229 244
230 params: 245 params:
231 source_dir: full path (local-OS-style) on local disk of directory to copy 246 source_dir: full path (local-OS-style) on local disk of directory to copy
232 contents of 247 contents of
233 dest_bucket: GCS bucket to copy the files into 248 dest_bucket: GS bucket to copy the files into
234 dest_dir: full path (Posix-style) within that bucket; write the files into 249 dest_dir: full path (Posix-style) within that bucket; write the files into
235 this directory. If None, write into the root directory of the bucket. 250 this directory. If None, write into the root directory of the bucket.
236 predefined_acl: which predefined ACL to apply to the files on Google 251 kwargs: any additional keyword arguments "inherited" from upload_file()
borenet 2014/07/24 14:25:18 I'd say you don't need to document kwargs.
epoger 2014/07/24 14:49:49 From live discussion: this is the best way we coul
237 Storage; must be one of the PredefinedACL values defined above.
238 If None, inherits dest_bucket's default object ACL.
239 TODO(epoger): add unittests for this param, although it seems to work
240 in my manual testing
241 fine_grained_acl_list: list of (id_type, id_value, permission) tuples
242 to apply to every file uploaded (on top of the predefined_acl),
243 or None if predefined_acl is sufficient
244 252
245 The copy operates as a "merge with overwrite": any files in source_dir will 253 The copy operates as a merge: any files in source_dir will be "overlaid" on
246 be "overlaid" on top of the existing content in dest_dir. Existing files 254 top of the existing content in dest_dir. Existing files with the same names
247 with the same names will be overwritten. 255 may or may not be overwritten, depending on the value of the upload_if kwarg
256 inherited from upload_file().
248 257
249 TODO(epoger): Upload multiple files simultaneously to reduce latency. 258 TODO(epoger): Upload multiple files simultaneously to reduce latency.
250
251 TODO(epoger): Add a "noclobber" mode that will not upload any files would
252 overwrite existing files in Google Storage.
253
254 TODO(epoger): Consider adding a do_compress parameter that would compress
255 the file using gzip before upload, and add a "Content-Encoding:gzip" header
256 so that HTTP downloads of the file would be unzipped automatically.
257 See https://developers.google.com/storage/docs/gsutil/addlhelp/
258 WorkingWithObjectMetadata#content-encoding
259 """ 259 """
260 b = self._connect_to_bucket(bucket_name=dest_bucket) 260 b = self._connect_to_bucket(bucket=dest_bucket)
261 for filename in sorted(os.listdir(source_dir)): 261 for filename in sorted(os.listdir(source_dir)):
262 local_path = os.path.join(source_dir, filename) 262 local_path = os.path.join(source_dir, filename)
263 if dest_dir: 263 if dest_dir:
264 remote_path = posixpath.join(dest_dir, filename) 264 remote_path = posixpath.join(dest_dir, filename)
265 else: 265 else:
266 remote_path = filename 266 remote_path = filename
267 267
268 if os.path.isdir(local_path): 268 if os.path.isdir(local_path):
269 self.upload_dir_contents( # recurse 269 self.upload_dir_contents( # recurse
270 source_dir=local_path, dest_bucket=dest_bucket, 270 source_dir=local_path, dest_bucket=b, dest_dir=remote_path,
271 dest_dir=remote_path, 271 **kwargs)
272 predefined_acl=predefined_acl,
273 fine_grained_acl_list=fine_grained_acl_list)
274 else: 272 else:
275 key = Key(b) 273 self.upload_file(
276 key.name = remote_path 274 source_path=local_path, dest_bucket=b, dest_path=remote_path,
277 try: 275 **kwargs)
278 key.set_contents_from_filename(
279 filename=local_path, policy=predefined_acl)
280 except BotoServerError, e:
281 e.body = (repr(e.body) +
282 ' while uploading local_path=%s to bucket=%s, path=%s' % (
283 local_path, dest_bucket, remote_path))
284 raise
285 # TODO(epoger): This may be inefficient, because it calls
286 # _connect_to_bucket() for every file. Depending on how expensive that
287 # call is, we may want to optimize this.
288 for (id_type, id_value, permission) in fine_grained_acl_list or []:
289 self.set_acl(
290 bucket=dest_bucket, path=remote_path,
291 id_type=id_type, id_value=id_value, permission=permission)
292 276
293 def download_file(self, source_bucket, source_path, dest_path, 277 def download_file(self, source_bucket, source_path, dest_path,
294 create_subdirs_if_needed=False): 278 create_subdirs_if_needed=False):
295 """Downloads a single file from Google Cloud Storage to local disk. 279 """Downloads a single file from Google Cloud Storage to local disk.
296 280
297 Args: 281 Args:
298 source_bucket: GCS bucket to download the file from 282 source_bucket: GS bucket to download the file from
299 source_path: full path (Posix-style) within that bucket 283 source_path: full path (Posix-style) within that bucket
300 dest_path: full path (local-OS-style) on local disk to copy the file to 284 dest_path: full path (local-OS-style) on local disk to copy the file to
301 create_subdirs_if_needed: boolean; whether to create subdirectories as 285 create_subdirs_if_needed: boolean; whether to create subdirectories as
302 needed to create dest_path 286 needed to create dest_path
303 """ 287 """
304 b = self._connect_to_bucket(bucket_name=source_bucket) 288 b = self._connect_to_bucket(bucket=source_bucket)
305 key = Key(b) 289 key = Key(b)
306 key.name = source_path 290 key.name = source_path
307 if create_subdirs_if_needed: 291 if create_subdirs_if_needed:
308 _makedirs_if_needed(os.path.dirname(dest_path)) 292 _makedirs_if_needed(os.path.dirname(dest_path))
309 with open(dest_path, 'w') as f: 293 with open(dest_path, 'w') as f:
310 try: 294 try:
311 key.get_contents_to_file(fp=f) 295 key.get_contents_to_file(fp=f)
312 except BotoServerError, e: 296 except BotoServerError, e:
313 e.body = (repr(e.body) + 297 e.body = (repr(e.body) +
314 ' while downloading bucket=%s, path=%s to local_path=%s' % ( 298 ' while downloading bucket=%s, path=%s to local_path=%s' % (
315 source_bucket, source_path, dest_path)) 299 source_bucket, source_path, dest_path))
316 raise 300 raise
317 301
318 def download_dir_contents(self, source_bucket, source_dir, dest_dir): 302 def download_dir_contents(self, source_bucket, source_dir, dest_dir):
319 """Recursively download contents of a Google Storage directory to local disk 303 """Recursively download contents of a Google Storage directory to local disk
320 304
321 params: 305 params:
322 source_bucket: GCS bucket to copy the files from 306 source_bucket: GS bucket to copy the files from
323 source_dir: full path (Posix-style) within that bucket; read the files 307 source_dir: full path (Posix-style) within that bucket; read the files
324 from this directory 308 from this directory
325 dest_dir: full path (local-OS-style) on local disk of directory to copy 309 dest_dir: full path (local-OS-style) on local disk of directory to copy
326 the files into 310 the files into
327 311
328 The copy operates as a "merge with overwrite": any files in source_dir will 312 The copy operates as a "merge with overwrite": any files in source_dir will
329 be "overlaid" on top of the existing content in dest_dir. Existing files 313 be "overlaid" on top of the existing content in dest_dir. Existing files
330 with the same names will be overwritten. 314 with the same names will be overwritten.
331 315
332 TODO(epoger): Download multiple files simultaneously to reduce latency. 316 TODO(epoger): Download multiple files simultaneously to reduce latency.
333 """ 317 """
334 _makedirs_if_needed(dest_dir) 318 _makedirs_if_needed(dest_dir)
335 b = self._connect_to_bucket(bucket_name=source_bucket) 319 b = self._connect_to_bucket(bucket=source_bucket)
336 (dirs, files) = self.list_bucket_contents( 320 (dirs, files) = self.list_bucket_contents(
337 bucket=source_bucket, subdir=source_dir) 321 bucket=source_bucket, subdir=source_dir)
338 322
339 for filename in files: 323 for filename in files:
340 key = Key(b) 324 key = Key(b)
341 key.name = posixpath.join(source_dir, filename) 325 key.name = posixpath.join(source_dir, filename)
342 dest_path = os.path.join(dest_dir, filename) 326 dest_path = os.path.join(dest_dir, filename)
343 with open(dest_path, 'w') as f: 327 with open(dest_path, 'w') as f:
344 try: 328 try:
345 key.get_contents_to_file(fp=f) 329 key.get_contents_to_file(fp=f)
(...skipping 25 matching lines...) Expand all
371 path: full path (Posix-style) to the file within that bucket 355 path: full path (Posix-style) to the file within that bucket
372 id_type: must be one of the IdType values defined above 356 id_type: must be one of the IdType values defined above
373 id_value: get permissions for users whose id_type field contains this 357 id_value: get permissions for users whose id_type field contains this
374 value 358 value
375 359
376 Returns: the Permission value which has been set for users matching 360 Returns: the Permission value which has been set for users matching
377 this id_type/id_value, on this file; or Permission.EMPTY if no such 361 this id_type/id_value, on this file; or Permission.EMPTY if no such
378 permissions have been set. 362 permissions have been set.
379 """ 363 """
380 field = self._field_by_id_type[id_type] 364 field = self._field_by_id_type[id_type]
381 b = self._connect_to_bucket(bucket_name=bucket) 365 b = self._connect_to_bucket(bucket=bucket)
382 acls = b.get_acl(key_name=path) 366 acls = b.get_acl(key_name=path)
383 matching_entries = [entry for entry in acls.entries.entry_list 367 matching_entries = [entry for entry in acls.entries.entry_list
384 if (entry.scope.type == id_type) and 368 if (entry.scope.type == id_type) and
385 (getattr(entry.scope, field) == id_value)] 369 (getattr(entry.scope, field) == id_value)]
386 if matching_entries: 370 if matching_entries:
387 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) 371 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)
388 return matching_entries[0].permission 372 return matching_entries[0].permission
389 else: 373 else:
390 return self.Permission.EMPTY 374 return self.Permission.EMPTY
391 375
(...skipping 28 matching lines...) Expand all
420 bucket = 'gs://bucket-name' 404 bucket = 'gs://bucket-name'
421 path = 'path/to/file' 405 path = 'path/to/file'
422 id_type = IdType.USER_BY_EMAIL 406 id_type = IdType.USER_BY_EMAIL
423 id_value = 'epoger@google.com' 407 id_value = 'epoger@google.com'
424 set_acl(bucket, path, id_type, id_value, Permission.READ) 408 set_acl(bucket, path, id_type, id_value, Permission.READ)
425 assert Permission.READ == get_acl(bucket, path, id_type, id_value) 409 assert Permission.READ == get_acl(bucket, path, id_type, id_value)
426 set_acl(bucket, path, id_type, id_value, Permission.WRITE) 410 set_acl(bucket, path, id_type, id_value, Permission.WRITE)
427 assert Permission.WRITE == get_acl(bucket, path, id_type, id_value) 411 assert Permission.WRITE == get_acl(bucket, path, id_type, id_value)
428 """ 412 """
429 field = self._field_by_id_type[id_type] 413 field = self._field_by_id_type[id_type]
430 b = self._connect_to_bucket(bucket_name=bucket) 414 b = self._connect_to_bucket(bucket=bucket)
431 acls = b.get_acl(key_name=path) 415 acls = b.get_acl(key_name=path)
432 416
433 # Remove any existing entries that refer to the same id_type/id_value, 417 # Remove any existing entries that refer to the same id_type/id_value,
434 # because the API will fail if we try to set more than one. 418 # because the API will fail if we try to set more than one.
435 matching_entries = [entry for entry in acls.entries.entry_list 419 matching_entries = [entry for entry in acls.entries.entry_list
436 if (entry.scope.type == id_type) and 420 if (entry.scope.type == id_type) and
437 (getattr(entry.scope, field) == id_value)] 421 (getattr(entry.scope, field) == id_value)]
438 if matching_entries: 422 if matching_entries:
439 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) 423 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries)
440 acls.entries.entry_list.remove(matching_entries[0]) 424 acls.entries.entry_list.remove(matching_entries[0])
(...skipping 17 matching lines...) Expand all
458 Args: 442 Args:
459 bucket: name of the Google Storage bucket 443 bucket: name of the Google Storage bucket
460 subdir: directory within the bucket to list, or None for root directory 444 subdir: directory within the bucket to list, or None for root directory
461 """ 445 """
462 # The GS command relies on the prefix (if any) ending with a slash. 446 # The GS command relies on the prefix (if any) ending with a slash.
463 prefix = subdir or '' 447 prefix = subdir or ''
464 if prefix and not prefix.endswith('/'): 448 if prefix and not prefix.endswith('/'):
465 prefix += '/' 449 prefix += '/'
466 prefix_length = len(prefix) if prefix else 0 450 prefix_length = len(prefix) if prefix else 0
467 451
468 b = self._connect_to_bucket(bucket_name=bucket) 452 b = self._connect_to_bucket(bucket=bucket)
469 items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') 453 items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
470 dirs = [] 454 dirs = []
471 files = [] 455 files = []
472 for item in items: 456 for item in items:
473 t = type(item) 457 t = type(item)
474 if t is Key: 458 if t is Key:
475 files.append(item.name[prefix_length:]) 459 files.append(item.name[prefix_length:])
476 elif t is Prefix: 460 elif t is Prefix:
477 dirs.append(item.name[prefix_length:-1]) 461 dirs.append(item.name[prefix_length:-1])
478 return (dirs, files) 462 return (dirs, files)
479 463
480 def _connect_to_bucket(self, bucket_name): 464 def _connect_to_bucket(self, bucket):
481 """Returns a Bucket object we can use to access a particular bucket in GS. 465 """Returns a Bucket object we can use to access a particular bucket in GS.
482 466
483 Params: 467 Params:
484 bucket_name: name of the bucket (e.g., 'chromium-skia-gm') 468 bucket: name of the bucket (e.g., 'chromium-skia-gm'), or a Bucket
469 object (in which case this param is just returned as-is)
485 """ 470 """
471 if type(bucket) is Bucket:
472 return bucket
486 try: 473 try:
487 return self._create_connection().get_bucket(bucket_name=bucket_name) 474 return self._create_connection().get_bucket(bucket_name=bucket)
488 except BotoServerError, e: 475 except BotoServerError, e:
489 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket_name 476 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket
490 raise 477 raise
491 478
492 def _create_connection(self): 479 def _create_connection(self):
493 """Returns a GSConnection object we can use to access Google Storage.""" 480 """Returns a GSConnection object we can use to access Google Storage."""
494 if self._gs_access_key_id: 481 if self._gs_access_key_id:
495 return GSConnection( 482 return GSConnection(
496 gs_access_key_id=self._gs_access_key_id, 483 gs_access_key_id=self._gs_access_key_id,
497 gs_secret_access_key=self._gs_secret_access_key) 484 gs_secret_access_key=self._gs_secret_access_key)
498 else: 485 else:
499 return AnonymousGSConnection() 486 return AnonymousGSConnection()
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
539 526
540 def _get_local_md5(path): 527 def _get_local_md5(path):
541 """Returns the MD5 hash of a file on local disk.""" 528 """Returns the MD5 hash of a file on local disk."""
542 hasher = hashlib.md5() 529 hasher = hashlib.md5()
543 with open(path, 'rb') as f: 530 with open(path, 'rb') as f:
544 while True: 531 while True:
545 data = f.read(64*1024) 532 data = f.read(64*1024)
546 if not data: 533 if not data:
547 return hasher.hexdigest() 534 return hasher.hexdigest()
548 hasher.update(data) 535 hasher.update(data)
OLDNEW
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698