OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
94 PUBLIC_READ_WRITE = 'public-read-write' | 94 PUBLIC_READ_WRITE = 'public-read-write' |
95 | 95 |
96 class IdType: | 96 class IdType: |
97 """Types of identifiers we can use to set "fine-grained" ACLs.""" | 97 """Types of identifiers we can use to set "fine-grained" ACLs.""" |
98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | 98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN |
99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | 99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL |
100 GROUP_BY_ID = acl.GROUP_BY_ID | 100 GROUP_BY_ID = acl.GROUP_BY_ID |
101 USER_BY_EMAIL = acl.USER_BY_EMAIL | 101 USER_BY_EMAIL = acl.USER_BY_EMAIL |
102 USER_BY_ID = acl.USER_BY_ID | 102 USER_BY_ID = acl.USER_BY_ID |
103 | 103 |
| 104 class UploadIf: |
| 105 """Cases in which we will upload a file. |
| 106 |
| 107 Beware of performance tradeoffs. E.g., if the file is small, the extra |
| 108 round trip to check for file existence and/or checksum may take longer than |
| 109 just uploading the file.""" |
| 110 ALWAYS = 1 # always upload the file |
| 111 IF_NEW = 2 # if there is an existing file with the same name, |
| 112 # leave it alone |
| 113 IF_MODIFIED = 3 # if there is an existing file with the same name and |
| 114 # contents, leave it alone |
104 | 115 |
105 def __init__(self, boto_file_path=None): | 116 def __init__(self, boto_file_path=None): |
106 """Constructor. | 117 """Constructor. |
107 | 118 |
108 Params: | 119 Params: |
109 boto_file_path: full path (local-OS-style) on local disk where .boto | 120 boto_file_path: full path (local-OS-style) on local disk where .boto |
110 credentials file can be found. If None, then the GSUtils object | 121 credentials file can be found. If None, then the GSUtils object |
111 created will be able to access only public files in Google Storage. | 122 created will be able to access only public files in Google Storage. |
112 | 123 |
113 Raises an exception if no file is found at boto_file_path, or if the file | 124 Raises an exception if no file is found at boto_file_path, or if the file |
(...skipping 18 matching lines...) Expand all Loading... |
132 def delete_file(self, bucket, path): | 143 def delete_file(self, bucket, path): |
133 """Delete a single file within a GS bucket. | 144 """Delete a single file within a GS bucket. |
134 | 145 |
135 TODO(epoger): what if bucket or path does not exist? Should probably raise | 146 TODO(epoger): what if bucket or path does not exist? Should probably raise |
136 an exception. Implement, and add a test to exercise this. | 147 an exception. Implement, and add a test to exercise this. |
137 | 148 |
138 Params: | 149 Params: |
139 bucket: GS bucket to delete a file from | 150 bucket: GS bucket to delete a file from |
140 path: full path (Posix-style) of the file within the bucket to delete | 151 path: full path (Posix-style) of the file within the bucket to delete |
141 """ | 152 """ |
142 b = self._connect_to_bucket(bucket_name=bucket) | 153 b = self._connect_to_bucket(bucket=bucket) |
143 key = Key(b) | 154 key = Key(b) |
144 key.name = path | 155 key.name = path |
145 try: | 156 try: |
146 key.delete() | 157 key.delete() |
147 except BotoServerError, e: | 158 except BotoServerError, e: |
148 e.body = (repr(e.body) + | 159 e.body = (repr(e.body) + |
149 ' while deleting bucket=%s, path=%s' % (bucket, path)) | 160 ' while deleting bucket=%s, path=%s' % (bucket, path)) |
150 raise | 161 raise |
151 | 162 |
152 def get_last_modified_time(self, bucket, path): | 163 def get_last_modified_time(self, bucket, path): |
153 """Gets the timestamp of when this file was last modified. | 164 """Gets the timestamp of when this file was last modified. |
154 | 165 |
155 Params: | 166 Params: |
156 bucket: GS bucket in which to look for the file | 167 bucket: GS bucket in which to look for the file |
157 path: full path (Posix-style) of the file within the bucket to check | 168 path: full path (Posix-style) of the file within the bucket to check |
158 | 169 |
159 Returns the last modified time, as a freeform string. If the file was not | 170 Returns the last modified time, as a freeform string. If the file was not |
160 found, returns None. | 171 found, returns None. |
161 """ | 172 """ |
162 b = self._connect_to_bucket(bucket_name=bucket) | 173 b = self._connect_to_bucket(bucket=bucket) |
163 try: | 174 try: |
164 key = b.get_key(key_name=path) | 175 key = b.get_key(key_name=path) |
165 if not key: | 176 if not key: |
166 return None | 177 return None |
167 return key.last_modified | 178 return key.last_modified |
168 except BotoServerError, e: | 179 except BotoServerError, e: |
169 e.body = (repr(e.body) + | 180 e.body = (repr(e.body) + |
170 ' while getting attributes of bucket=%s, path=%s' % ( | 181 ' while getting attributes of bucket=%s, path=%s' % ( |
171 bucket, path)) | 182 bucket, path)) |
172 raise | 183 raise |
173 | 184 |
174 def upload_file(self, source_path, dest_bucket, dest_path, | 185 def upload_file(self, source_path, dest_bucket, dest_path, |
175 only_if_modified=False, predefined_acl=None, | 186 upload_if=UploadIf.ALWAYS, |
| 187 predefined_acl=None, |
176 fine_grained_acl_list=None): | 188 fine_grained_acl_list=None): |
177 """Upload contents of a local file to Google Storage. | 189 """Upload contents of a local file to Google Storage. |
178 | 190 |
179 params: | 191 params: |
180 source_path: full path (local-OS-style) on local disk to read from | 192 source_path: full path (local-OS-style) on local disk to read from |
181 dest_bucket: GCS bucket to copy the file to | 193 dest_bucket: GS bucket to copy the file to |
182 dest_path: full path (Posix-style) within that bucket | 194 dest_path: full path (Posix-style) within that bucket |
183 only_if_modified: if True, only upload the file if it would actually | 195 upload_if: one of the UploadIf values, describing in which cases we should |
184 change the content on Google Storage (uploads the file if dest_path | 196 upload the file |
185 does not exist, or if it exists but has different contents than | |
186 source_path). Note that this may take longer than just uploading the | |
187 file without checking first, due to extra round-trips! | |
188 predefined_acl: which predefined ACL to apply to the file on Google | 197 predefined_acl: which predefined ACL to apply to the file on Google |
189 Storage; must be one of the PredefinedACL values defined above. | 198 Storage; must be one of the PredefinedACL values defined above. |
190 If None, inherits dest_bucket's default object ACL. | 199 If None, inherits dest_bucket's default object ACL. |
191 TODO(epoger): add unittests for this param, although it seems to work | |
192 in my manual testing | |
193 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | 200 fine_grained_acl_list: list of (id_type, id_value, permission) tuples |
194 to apply to the uploaded file (on top of the predefined_acl), | 201 to apply to the uploaded file (on top of the predefined_acl), |
195 or None if predefined_acl is sufficient | 202 or None if predefined_acl is sufficient |
| 203 |
| 204 TODO(epoger): Consider adding a do_compress parameter that would compress |
| 205 the file using gzip before upload, and add a "Content-Encoding:gzip" header |
| 206 so that HTTP downloads of the file would be unzipped automatically. |
| 207 See https://developers.google.com/storage/docs/gsutil/addlhelp/ |
| 208 WorkingWithObjectMetadata#content-encoding |
196 """ | 209 """ |
197 b = self._connect_to_bucket(bucket_name=dest_bucket) | 210 b = self._connect_to_bucket(bucket=dest_bucket) |
198 | 211 |
199 if only_if_modified: | 212 if upload_if == self.UploadIf.IF_NEW: |
| 213 old_key = b.get_key(key_name=dest_path) |
| 214 if old_key: |
| 215 print 'Skipping upload of existing file gs://%s/%s' % ( |
| 216 dest_bucket, dest_path) |
| 217 return |
| 218 elif upload_if == self.UploadIf.IF_MODIFIED: |
200 old_key = b.get_key(key_name=dest_path) | 219 old_key = b.get_key(key_name=dest_path) |
201 if old_key: | 220 if old_key: |
202 local_md5 = '"%s"' % _get_local_md5(path=source_path) | 221 local_md5 = '"%s"' % _get_local_md5(path=source_path) |
203 if local_md5 == old_key.etag: | 222 if local_md5 == old_key.etag: |
204 print 'Skipping upload of unmodified file %s : %s' % ( | 223 print 'Skipping upload of unmodified file gs://%s/%s : %s' % ( |
205 source_path, local_md5) | 224 dest_bucket, dest_path, local_md5) |
206 return | 225 return |
| 226 elif upload_if != self.UploadIf.ALWAYS: |
| 227 raise Exception('unknown value of upload_if: %s' % upload_if) |
207 | 228 |
208 key = Key(b) | 229 key = Key(b) |
209 key.name = dest_path | 230 key.name = dest_path |
210 try: | 231 try: |
211 key.set_contents_from_filename(filename=source_path, | 232 key.set_contents_from_filename(filename=source_path, |
212 policy=predefined_acl) | 233 policy=predefined_acl) |
213 except BotoServerError, e: | 234 except BotoServerError, e: |
214 e.body = (repr(e.body) + | 235 e.body = (repr(e.body) + |
215 ' while uploading source_path=%s to bucket=%s, path=%s' % ( | 236 ' while uploading source_path=%s to bucket=%s, path=%s' % ( |
216 source_path, dest_bucket, key.name)) | 237 source_path, dest_bucket, key.name)) |
217 raise | 238 raise |
218 # TODO(epoger): This may be inefficient, because it calls | |
219 # _connect_to_bucket() again. Depending on how expensive that | |
220 # call is, we may want to optimize this. | |
221 for (id_type, id_value, permission) in fine_grained_acl_list or []: | 239 for (id_type, id_value, permission) in fine_grained_acl_list or []: |
222 self.set_acl( | 240 self.set_acl( |
223 bucket=dest_bucket, path=key.name, | 241 bucket=b, path=key.name, |
224 id_type=id_type, id_value=id_value, permission=permission) | 242 id_type=id_type, id_value=id_value, permission=permission) |
225 | 243 |
226 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, | 244 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, **kwargs): |
227 predefined_acl=None, fine_grained_acl_list=None): | |
228 """Recursively upload contents of a local directory to Google Storage. | 245 """Recursively upload contents of a local directory to Google Storage. |
229 | 246 |
230 params: | 247 params: |
231 source_dir: full path (local-OS-style) on local disk of directory to copy | 248 source_dir: full path (local-OS-style) on local disk of directory to copy |
232 contents of | 249 contents of |
233 dest_bucket: GCS bucket to copy the files into | 250 dest_bucket: GS bucket to copy the files into |
234 dest_dir: full path (Posix-style) within that bucket; write the files into | 251 dest_dir: full path (Posix-style) within that bucket; write the files into |
235 this directory. If None, write into the root directory of the bucket. | 252 this directory. If None, write into the root directory of the bucket. |
236 predefined_acl: which predefined ACL to apply to the files on Google | 253 kwargs: any additional keyword arguments "inherited" from upload_file() |
237 Storage; must be one of the PredefinedACL values defined above. | |
238 If None, inherits dest_bucket's default object ACL. | |
239 TODO(epoger): add unittests for this param, although it seems to work | |
240 in my manual testing | |
241 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | |
242 to apply to every file uploaded (on top of the predefined_acl), | |
243 or None if predefined_acl is sufficient | |
244 | 254 |
245 The copy operates as a "merge with overwrite": any files in source_dir will | 255 The copy operates as a merge: any files in source_dir will be "overlaid" on |
246 be "overlaid" on top of the existing content in dest_dir. Existing files | 256 top of the existing content in dest_dir. Existing files with the same names |
247 with the same names will be overwritten. | 257 may or may not be overwritten, depending on the value of the upload_if kwarg |
| 258 inherited from upload_file(). |
248 | 259 |
249 TODO(epoger): Upload multiple files simultaneously to reduce latency. | 260 TODO(epoger): Upload multiple files simultaneously to reduce latency. |
250 | |
251 TODO(epoger): Add a "noclobber" mode that will not upload any files would | |
252 overwrite existing files in Google Storage. | |
253 | |
254 TODO(epoger): Consider adding a do_compress parameter that would compress | |
255 the file using gzip before upload, and add a "Content-Encoding:gzip" header | |
256 so that HTTP downloads of the file would be unzipped automatically. | |
257 See https://developers.google.com/storage/docs/gsutil/addlhelp/ | |
258 WorkingWithObjectMetadata#content-encoding | |
259 """ | 261 """ |
260 b = self._connect_to_bucket(bucket_name=dest_bucket) | 262 b = self._connect_to_bucket(bucket=dest_bucket) |
261 for filename in sorted(os.listdir(source_dir)): | 263 for filename in sorted(os.listdir(source_dir)): |
262 local_path = os.path.join(source_dir, filename) | 264 local_path = os.path.join(source_dir, filename) |
263 if dest_dir: | 265 if dest_dir: |
264 remote_path = posixpath.join(dest_dir, filename) | 266 remote_path = posixpath.join(dest_dir, filename) |
265 else: | 267 else: |
266 remote_path = filename | 268 remote_path = filename |
267 | 269 |
268 if os.path.isdir(local_path): | 270 if os.path.isdir(local_path): |
269 self.upload_dir_contents( # recurse | 271 self.upload_dir_contents( # recurse |
270 source_dir=local_path, dest_bucket=dest_bucket, | 272 source_dir=local_path, dest_bucket=b, dest_dir=remote_path, |
271 dest_dir=remote_path, | 273 **kwargs) |
272 predefined_acl=predefined_acl, | |
273 fine_grained_acl_list=fine_grained_acl_list) | |
274 else: | 274 else: |
275 key = Key(b) | 275 self.upload_file( |
276 key.name = remote_path | 276 source_path=local_path, dest_bucket=b, dest_path=remote_path, |
277 try: | 277 **kwargs) |
278 key.set_contents_from_filename( | |
279 filename=local_path, policy=predefined_acl) | |
280 except BotoServerError, e: | |
281 e.body = (repr(e.body) + | |
282 ' while uploading local_path=%s to bucket=%s, path=%s' % ( | |
283 local_path, dest_bucket, remote_path)) | |
284 raise | |
285 # TODO(epoger): This may be inefficient, because it calls | |
286 # _connect_to_bucket() for every file. Depending on how expensive that | |
287 # call is, we may want to optimize this. | |
288 for (id_type, id_value, permission) in fine_grained_acl_list or []: | |
289 self.set_acl( | |
290 bucket=dest_bucket, path=remote_path, | |
291 id_type=id_type, id_value=id_value, permission=permission) | |
292 | 278 |
293 def download_file(self, source_bucket, source_path, dest_path, | 279 def download_file(self, source_bucket, source_path, dest_path, |
294 create_subdirs_if_needed=False): | 280 create_subdirs_if_needed=False): |
295 """Downloads a single file from Google Cloud Storage to local disk. | 281 """Downloads a single file from Google Cloud Storage to local disk. |
296 | 282 |
297 Args: | 283 Args: |
298 source_bucket: GCS bucket to download the file from | 284 source_bucket: GS bucket to download the file from |
299 source_path: full path (Posix-style) within that bucket | 285 source_path: full path (Posix-style) within that bucket |
300 dest_path: full path (local-OS-style) on local disk to copy the file to | 286 dest_path: full path (local-OS-style) on local disk to copy the file to |
301 create_subdirs_if_needed: boolean; whether to create subdirectories as | 287 create_subdirs_if_needed: boolean; whether to create subdirectories as |
302 needed to create dest_path | 288 needed to create dest_path |
303 """ | 289 """ |
304 b = self._connect_to_bucket(bucket_name=source_bucket) | 290 b = self._connect_to_bucket(bucket=source_bucket) |
305 key = Key(b) | 291 key = Key(b) |
306 key.name = source_path | 292 key.name = source_path |
307 if create_subdirs_if_needed: | 293 if create_subdirs_if_needed: |
308 _makedirs_if_needed(os.path.dirname(dest_path)) | 294 _makedirs_if_needed(os.path.dirname(dest_path)) |
309 with open(dest_path, 'w') as f: | 295 with open(dest_path, 'w') as f: |
310 try: | 296 try: |
311 key.get_contents_to_file(fp=f) | 297 key.get_contents_to_file(fp=f) |
312 except BotoServerError, e: | 298 except BotoServerError, e: |
313 e.body = (repr(e.body) + | 299 e.body = (repr(e.body) + |
314 ' while downloading bucket=%s, path=%s to local_path=%s' % ( | 300 ' while downloading bucket=%s, path=%s to local_path=%s' % ( |
315 source_bucket, source_path, dest_path)) | 301 source_bucket, source_path, dest_path)) |
316 raise | 302 raise |
317 | 303 |
318 def download_dir_contents(self, source_bucket, source_dir, dest_dir): | 304 def download_dir_contents(self, source_bucket, source_dir, dest_dir): |
319 """Recursively download contents of a Google Storage directory to local disk | 305 """Recursively download contents of a Google Storage directory to local disk |
320 | 306 |
321 params: | 307 params: |
322 source_bucket: GCS bucket to copy the files from | 308 source_bucket: GS bucket to copy the files from |
323 source_dir: full path (Posix-style) within that bucket; read the files | 309 source_dir: full path (Posix-style) within that bucket; read the files |
324 from this directory | 310 from this directory |
325 dest_dir: full path (local-OS-style) on local disk of directory to copy | 311 dest_dir: full path (local-OS-style) on local disk of directory to copy |
326 the files into | 312 the files into |
327 | 313 |
328 The copy operates as a "merge with overwrite": any files in source_dir will | 314 The copy operates as a "merge with overwrite": any files in source_dir will |
329 be "overlaid" on top of the existing content in dest_dir. Existing files | 315 be "overlaid" on top of the existing content in dest_dir. Existing files |
330 with the same names will be overwritten. | 316 with the same names will be overwritten. |
331 | 317 |
332 TODO(epoger): Download multiple files simultaneously to reduce latency. | 318 TODO(epoger): Download multiple files simultaneously to reduce latency. |
333 """ | 319 """ |
334 _makedirs_if_needed(dest_dir) | 320 _makedirs_if_needed(dest_dir) |
335 b = self._connect_to_bucket(bucket_name=source_bucket) | 321 b = self._connect_to_bucket(bucket=source_bucket) |
336 (dirs, files) = self.list_bucket_contents( | 322 (dirs, files) = self.list_bucket_contents( |
337 bucket=source_bucket, subdir=source_dir) | 323 bucket=source_bucket, subdir=source_dir) |
338 | 324 |
339 for filename in files: | 325 for filename in files: |
340 key = Key(b) | 326 key = Key(b) |
341 key.name = posixpath.join(source_dir, filename) | 327 key.name = posixpath.join(source_dir, filename) |
342 dest_path = os.path.join(dest_dir, filename) | 328 dest_path = os.path.join(dest_dir, filename) |
343 with open(dest_path, 'w') as f: | 329 with open(dest_path, 'w') as f: |
344 try: | 330 try: |
345 key.get_contents_to_file(fp=f) | 331 key.get_contents_to_file(fp=f) |
(...skipping 25 matching lines...) Expand all Loading... |
371 path: full path (Posix-style) to the file within that bucket | 357 path: full path (Posix-style) to the file within that bucket |
372 id_type: must be one of the IdType values defined above | 358 id_type: must be one of the IdType values defined above |
373 id_value: get permissions for users whose id_type field contains this | 359 id_value: get permissions for users whose id_type field contains this |
374 value | 360 value |
375 | 361 |
376 Returns: the Permission value which has been set for users matching | 362 Returns: the Permission value which has been set for users matching |
377 this id_type/id_value, on this file; or Permission.EMPTY if no such | 363 this id_type/id_value, on this file; or Permission.EMPTY if no such |
378 permissions have been set. | 364 permissions have been set. |
379 """ | 365 """ |
380 field = self._field_by_id_type[id_type] | 366 field = self._field_by_id_type[id_type] |
381 b = self._connect_to_bucket(bucket_name=bucket) | 367 b = self._connect_to_bucket(bucket=bucket) |
382 acls = b.get_acl(key_name=path) | 368 acls = b.get_acl(key_name=path) |
383 matching_entries = [entry for entry in acls.entries.entry_list | 369 matching_entries = [entry for entry in acls.entries.entry_list |
384 if (entry.scope.type == id_type) and | 370 if (entry.scope.type == id_type) and |
385 (getattr(entry.scope, field) == id_value)] | 371 (getattr(entry.scope, field) == id_value)] |
386 if matching_entries: | 372 if matching_entries: |
387 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | 373 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) |
388 return matching_entries[0].permission | 374 return matching_entries[0].permission |
389 else: | 375 else: |
390 return self.Permission.EMPTY | 376 return self.Permission.EMPTY |
391 | 377 |
(...skipping 28 matching lines...) Expand all Loading... |
420 bucket = 'gs://bucket-name' | 406 bucket = 'gs://bucket-name' |
421 path = 'path/to/file' | 407 path = 'path/to/file' |
422 id_type = IdType.USER_BY_EMAIL | 408 id_type = IdType.USER_BY_EMAIL |
423 id_value = 'epoger@google.com' | 409 id_value = 'epoger@google.com' |
424 set_acl(bucket, path, id_type, id_value, Permission.READ) | 410 set_acl(bucket, path, id_type, id_value, Permission.READ) |
425 assert Permission.READ == get_acl(bucket, path, id_type, id_value) | 411 assert Permission.READ == get_acl(bucket, path, id_type, id_value) |
426 set_acl(bucket, path, id_type, id_value, Permission.WRITE) | 412 set_acl(bucket, path, id_type, id_value, Permission.WRITE) |
427 assert Permission.WRITE == get_acl(bucket, path, id_type, id_value) | 413 assert Permission.WRITE == get_acl(bucket, path, id_type, id_value) |
428 """ | 414 """ |
429 field = self._field_by_id_type[id_type] | 415 field = self._field_by_id_type[id_type] |
430 b = self._connect_to_bucket(bucket_name=bucket) | 416 b = self._connect_to_bucket(bucket=bucket) |
431 acls = b.get_acl(key_name=path) | 417 acls = b.get_acl(key_name=path) |
432 | 418 |
433 # Remove any existing entries that refer to the same id_type/id_value, | 419 # Remove any existing entries that refer to the same id_type/id_value, |
434 # because the API will fail if we try to set more than one. | 420 # because the API will fail if we try to set more than one. |
435 matching_entries = [entry for entry in acls.entries.entry_list | 421 matching_entries = [entry for entry in acls.entries.entry_list |
436 if (entry.scope.type == id_type) and | 422 if (entry.scope.type == id_type) and |
437 (getattr(entry.scope, field) == id_value)] | 423 (getattr(entry.scope, field) == id_value)] |
438 if matching_entries: | 424 if matching_entries: |
439 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | 425 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) |
440 acls.entries.entry_list.remove(matching_entries[0]) | 426 acls.entries.entry_list.remove(matching_entries[0]) |
(...skipping 17 matching lines...) Expand all Loading... |
458 Args: | 444 Args: |
459 bucket: name of the Google Storage bucket | 445 bucket: name of the Google Storage bucket |
460 subdir: directory within the bucket to list, or None for root directory | 446 subdir: directory within the bucket to list, or None for root directory |
461 """ | 447 """ |
462 # The GS command relies on the prefix (if any) ending with a slash. | 448 # The GS command relies on the prefix (if any) ending with a slash. |
463 prefix = subdir or '' | 449 prefix = subdir or '' |
464 if prefix and not prefix.endswith('/'): | 450 if prefix and not prefix.endswith('/'): |
465 prefix += '/' | 451 prefix += '/' |
466 prefix_length = len(prefix) if prefix else 0 | 452 prefix_length = len(prefix) if prefix else 0 |
467 | 453 |
468 b = self._connect_to_bucket(bucket_name=bucket) | 454 b = self._connect_to_bucket(bucket=bucket) |
469 items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') | 455 items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
470 dirs = [] | 456 dirs = [] |
471 files = [] | 457 files = [] |
472 for item in items: | 458 for item in items: |
473 t = type(item) | 459 t = type(item) |
474 if t is Key: | 460 if t is Key: |
475 files.append(item.name[prefix_length:]) | 461 files.append(item.name[prefix_length:]) |
476 elif t is Prefix: | 462 elif t is Prefix: |
477 dirs.append(item.name[prefix_length:-1]) | 463 dirs.append(item.name[prefix_length:-1]) |
478 return (dirs, files) | 464 return (dirs, files) |
479 | 465 |
480 def _connect_to_bucket(self, bucket_name): | 466 def _connect_to_bucket(self, bucket): |
481 """Returns a Bucket object we can use to access a particular bucket in GS. | 467 """Returns a Bucket object we can use to access a particular bucket in GS. |
482 | 468 |
483 Params: | 469 Params: |
484 bucket_name: name of the bucket (e.g., 'chromium-skia-gm') | 470 bucket: name of the bucket (e.g., 'chromium-skia-gm'), or a Bucket |
| 471 object (in which case this param is just returned as-is) |
485 """ | 472 """ |
| 473 if type(bucket) is Bucket: |
| 474 return bucket |
486 try: | 475 try: |
487 return self._create_connection().get_bucket(bucket_name=bucket_name) | 476 return self._create_connection().get_bucket(bucket_name=bucket) |
488 except BotoServerError, e: | 477 except BotoServerError, e: |
489 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket_name | 478 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket |
490 raise | 479 raise |
491 | 480 |
492 def _create_connection(self): | 481 def _create_connection(self): |
493 """Returns a GSConnection object we can use to access Google Storage.""" | 482 """Returns a GSConnection object we can use to access Google Storage.""" |
494 if self._gs_access_key_id: | 483 if self._gs_access_key_id: |
495 return GSConnection( | 484 return GSConnection( |
496 gs_access_key_id=self._gs_access_key_id, | 485 gs_access_key_id=self._gs_access_key_id, |
497 gs_secret_access_key=self._gs_secret_access_key) | 486 gs_secret_access_key=self._gs_secret_access_key) |
498 else: | 487 else: |
499 return AnonymousGSConnection() | 488 return AnonymousGSConnection() |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
539 | 528 |
540 def _get_local_md5(path): | 529 def _get_local_md5(path): |
541 """Returns the MD5 hash of a file on local disk.""" | 530 """Returns the MD5 hash of a file on local disk.""" |
542 hasher = hashlib.md5() | 531 hasher = hashlib.md5() |
543 with open(path, 'rb') as f: | 532 with open(path, 'rb') as f: |
544 while True: | 533 while True: |
545 data = f.read(64*1024) | 534 data = f.read(64*1024) |
546 if not data: | 535 if not data: |
547 return hasher.hexdigest() | 536 return hasher.hexdigest() |
548 hasher.update(data) | 537 hasher.update(data) |
OLD | NEW |