| OLD | NEW |
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 | 2 |
| 3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
| 4 """ | 4 """ |
| 5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
| 6 | 6 |
| 7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
| 8 found in the LICENSE file. | 8 found in the LICENSE file. |
| 9 | 9 |
| 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 94 PUBLIC_READ_WRITE = 'public-read-write' | 94 PUBLIC_READ_WRITE = 'public-read-write' |
| 95 | 95 |
| 96 class IdType: | 96 class IdType: |
| 97 """Types of identifiers we can use to set "fine-grained" ACLs.""" | 97 """Types of identifiers we can use to set "fine-grained" ACLs.""" |
| 98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | 98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN |
| 99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | 99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL |
| 100 GROUP_BY_ID = acl.GROUP_BY_ID | 100 GROUP_BY_ID = acl.GROUP_BY_ID |
| 101 USER_BY_EMAIL = acl.USER_BY_EMAIL | 101 USER_BY_EMAIL = acl.USER_BY_EMAIL |
| 102 USER_BY_ID = acl.USER_BY_ID | 102 USER_BY_ID = acl.USER_BY_ID |
| 103 | 103 |
| 104 class UploadIf: |
| 105 """Cases in which we will upload a file. |
| 106 |
| 107 Beware of performance tradeoffs. E.g., if the file is small, the extra |
| 108 round trip to check for file existence and/or checksum may take longer than |
| 109 just uploading the file.""" |
| 110 ALWAYS = 1 # always upload the file |
| 111 IF_NEW = 2 # if there is an existing file with the same name, |
| 112 # leave it alone |
| 113 IF_MODIFIED = 3 # if there is an existing file with the same name and |
| 114 # contents, leave it alone |
| 104 | 115 |
| 105 def __init__(self, boto_file_path=None): | 116 def __init__(self, boto_file_path=None): |
| 106 """Constructor. | 117 """Constructor. |
| 107 | 118 |
| 108 Params: | 119 Params: |
| 109 boto_file_path: full path (local-OS-style) on local disk where .boto | 120 boto_file_path: full path (local-OS-style) on local disk where .boto |
| 110 credentials file can be found. If None, then the GSUtils object | 121 credentials file can be found. If None, then the GSUtils object |
| 111 created will be able to access only public files in Google Storage. | 122 created will be able to access only public files in Google Storage. |
| 112 | 123 |
| 113 Raises an exception if no file is found at boto_file_path, or if the file | 124 Raises an exception if no file is found at boto_file_path, or if the file |
| (...skipping 18 matching lines...) Expand all Loading... |
| 132 def delete_file(self, bucket, path): | 143 def delete_file(self, bucket, path): |
| 133 """Delete a single file within a GS bucket. | 144 """Delete a single file within a GS bucket. |
| 134 | 145 |
| 135 TODO(epoger): what if bucket or path does not exist? Should probably raise | 146 TODO(epoger): what if bucket or path does not exist? Should probably raise |
| 136 an exception. Implement, and add a test to exercise this. | 147 an exception. Implement, and add a test to exercise this. |
| 137 | 148 |
| 138 Params: | 149 Params: |
| 139 bucket: GS bucket to delete a file from | 150 bucket: GS bucket to delete a file from |
| 140 path: full path (Posix-style) of the file within the bucket to delete | 151 path: full path (Posix-style) of the file within the bucket to delete |
| 141 """ | 152 """ |
| 142 b = self._connect_to_bucket(bucket_name=bucket) | 153 b = self._connect_to_bucket(bucket=bucket) |
| 143 key = Key(b) | 154 key = Key(b) |
| 144 key.name = path | 155 key.name = path |
| 145 try: | 156 try: |
| 146 key.delete() | 157 key.delete() |
| 147 except BotoServerError, e: | 158 except BotoServerError, e: |
| 148 e.body = (repr(e.body) + | 159 e.body = (repr(e.body) + |
| 149 ' while deleting bucket=%s, path=%s' % (bucket, path)) | 160 ' while deleting bucket=%s, path=%s' % (bucket, path)) |
| 150 raise | 161 raise |
| 151 | 162 |
| 152 def get_last_modified_time(self, bucket, path): | 163 def get_last_modified_time(self, bucket, path): |
| 153 """Gets the timestamp of when this file was last modified. | 164 """Gets the timestamp of when this file was last modified. |
| 154 | 165 |
| 155 Params: | 166 Params: |
| 156 bucket: GS bucket in which to look for the file | 167 bucket: GS bucket in which to look for the file |
| 157 path: full path (Posix-style) of the file within the bucket to check | 168 path: full path (Posix-style) of the file within the bucket to check |
| 158 | 169 |
| 159 Returns the last modified time, as a freeform string. If the file was not | 170 Returns the last modified time, as a freeform string. If the file was not |
| 160 found, returns None. | 171 found, returns None. |
| 161 """ | 172 """ |
| 162 b = self._connect_to_bucket(bucket_name=bucket) | 173 b = self._connect_to_bucket(bucket=bucket) |
| 163 try: | 174 try: |
| 164 key = b.get_key(key_name=path) | 175 key = b.get_key(key_name=path) |
| 165 if not key: | 176 if not key: |
| 166 return None | 177 return None |
| 167 return key.last_modified | 178 return key.last_modified |
| 168 except BotoServerError, e: | 179 except BotoServerError, e: |
| 169 e.body = (repr(e.body) + | 180 e.body = (repr(e.body) + |
| 170 ' while getting attributes of bucket=%s, path=%s' % ( | 181 ' while getting attributes of bucket=%s, path=%s' % ( |
| 171 bucket, path)) | 182 bucket, path)) |
| 172 raise | 183 raise |
| 173 | 184 |
| 174 def upload_file(self, source_path, dest_bucket, dest_path, | 185 def upload_file(self, source_path, dest_bucket, dest_path, |
| 175 only_if_modified=False, predefined_acl=None, | 186 upload_if=UploadIf.ALWAYS, |
| 187 predefined_acl=None, |
| 176 fine_grained_acl_list=None): | 188 fine_grained_acl_list=None): |
| 177 """Upload contents of a local file to Google Storage. | 189 """Upload contents of a local file to Google Storage. |
| 178 | 190 |
| 179 params: | 191 params: |
| 180 source_path: full path (local-OS-style) on local disk to read from | 192 source_path: full path (local-OS-style) on local disk to read from |
| 181 dest_bucket: GCS bucket to copy the file to | 193 dest_bucket: GS bucket to copy the file to |
| 182 dest_path: full path (Posix-style) within that bucket | 194 dest_path: full path (Posix-style) within that bucket |
| 183 only_if_modified: if True, only upload the file if it would actually | 195 upload_if: one of the UploadIf values, describing in which cases we should |
| 184 change the content on Google Storage (uploads the file if dest_path | 196 upload the file |
| 185 does not exist, or if it exists but has different contents than | |
| 186 source_path). Note that this may take longer than just uploading the | |
| 187 file without checking first, due to extra round-trips! | |
| 188 predefined_acl: which predefined ACL to apply to the file on Google | 197 predefined_acl: which predefined ACL to apply to the file on Google |
| 189 Storage; must be one of the PredefinedACL values defined above. | 198 Storage; must be one of the PredefinedACL values defined above. |
| 190 If None, inherits dest_bucket's default object ACL. | 199 If None, inherits dest_bucket's default object ACL. |
| 191 TODO(epoger): add unittests for this param, although it seems to work | |
| 192 in my manual testing | |
| 193 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | 200 fine_grained_acl_list: list of (id_type, id_value, permission) tuples |
| 194 to apply to the uploaded file (on top of the predefined_acl), | 201 to apply to the uploaded file (on top of the predefined_acl), |
| 195 or None if predefined_acl is sufficient | 202 or None if predefined_acl is sufficient |
| 203 |
| 204 TODO(epoger): Consider adding a do_compress parameter that would compress |
| 205 the file using gzip before upload, and add a "Content-Encoding:gzip" header |
| 206 so that HTTP downloads of the file would be unzipped automatically. |
| 207 See https://developers.google.com/storage/docs/gsutil/addlhelp/ |
| 208 WorkingWithObjectMetadata#content-encoding |
| 196 """ | 209 """ |
| 197 b = self._connect_to_bucket(bucket_name=dest_bucket) | 210 b = self._connect_to_bucket(bucket=dest_bucket) |
| 198 | 211 |
| 199 if only_if_modified: | 212 if upload_if == self.UploadIf.IF_NEW: |
| 213 old_key = b.get_key(key_name=dest_path) |
| 214 if old_key: |
| 215 print 'Skipping upload of existing file gs://%s/%s' % ( |
| 216 dest_bucket, dest_path) |
| 217 return |
| 218 elif upload_if == self.UploadIf.IF_MODIFIED: |
| 200 old_key = b.get_key(key_name=dest_path) | 219 old_key = b.get_key(key_name=dest_path) |
| 201 if old_key: | 220 if old_key: |
| 202 local_md5 = '"%s"' % _get_local_md5(path=source_path) | 221 local_md5 = '"%s"' % _get_local_md5(path=source_path) |
| 203 if local_md5 == old_key.etag: | 222 if local_md5 == old_key.etag: |
| 204 print 'Skipping upload of unmodified file %s : %s' % ( | 223 print 'Skipping upload of unmodified file gs://%s/%s : %s' % ( |
| 205 source_path, local_md5) | 224 dest_bucket, dest_path, local_md5) |
| 206 return | 225 return |
| 226 elif upload_if != self.UploadIf.ALWAYS: |
| 227 raise Exception('unknown value of upload_if: %s' % upload_if) |
| 207 | 228 |
| 208 key = Key(b) | 229 key = Key(b) |
| 209 key.name = dest_path | 230 key.name = dest_path |
| 210 try: | 231 try: |
| 211 key.set_contents_from_filename(filename=source_path, | 232 key.set_contents_from_filename(filename=source_path, |
| 212 policy=predefined_acl) | 233 policy=predefined_acl) |
| 213 except BotoServerError, e: | 234 except BotoServerError, e: |
| 214 e.body = (repr(e.body) + | 235 e.body = (repr(e.body) + |
| 215 ' while uploading source_path=%s to bucket=%s, path=%s' % ( | 236 ' while uploading source_path=%s to bucket=%s, path=%s' % ( |
| 216 source_path, dest_bucket, key.name)) | 237 source_path, dest_bucket, key.name)) |
| 217 raise | 238 raise |
| 218 # TODO(epoger): This may be inefficient, because it calls | |
| 219 # _connect_to_bucket() again. Depending on how expensive that | |
| 220 # call is, we may want to optimize this. | |
| 221 for (id_type, id_value, permission) in fine_grained_acl_list or []: | 239 for (id_type, id_value, permission) in fine_grained_acl_list or []: |
| 222 self.set_acl( | 240 self.set_acl( |
| 223 bucket=dest_bucket, path=key.name, | 241 bucket=b, path=key.name, |
| 224 id_type=id_type, id_value=id_value, permission=permission) | 242 id_type=id_type, id_value=id_value, permission=permission) |
| 225 | 243 |
| 226 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, | 244 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, **kwargs): |
| 227 predefined_acl=None, fine_grained_acl_list=None): | |
| 228 """Recursively upload contents of a local directory to Google Storage. | 245 """Recursively upload contents of a local directory to Google Storage. |
| 229 | 246 |
| 230 params: | 247 params: |
| 231 source_dir: full path (local-OS-style) on local disk of directory to copy | 248 source_dir: full path (local-OS-style) on local disk of directory to copy |
| 232 contents of | 249 contents of |
| 233 dest_bucket: GCS bucket to copy the files into | 250 dest_bucket: GS bucket to copy the files into |
| 234 dest_dir: full path (Posix-style) within that bucket; write the files into | 251 dest_dir: full path (Posix-style) within that bucket; write the files into |
| 235 this directory. If None, write into the root directory of the bucket. | 252 this directory. If None, write into the root directory of the bucket. |
| 236 predefined_acl: which predefined ACL to apply to the files on Google | 253 kwargs: any additional keyword arguments "inherited" from upload_file() |
| 237 Storage; must be one of the PredefinedACL values defined above. | |
| 238 If None, inherits dest_bucket's default object ACL. | |
| 239 TODO(epoger): add unittests for this param, although it seems to work | |
| 240 in my manual testing | |
| 241 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | |
| 242 to apply to every file uploaded (on top of the predefined_acl), | |
| 243 or None if predefined_acl is sufficient | |
| 244 | 254 |
| 245 The copy operates as a "merge with overwrite": any files in source_dir will | 255 The copy operates as a merge: any files in source_dir will be "overlaid" on |
| 246 be "overlaid" on top of the existing content in dest_dir. Existing files | 256 top of the existing content in dest_dir. Existing files with the same names |
| 247 with the same names will be overwritten. | 257 may or may not be overwritten, depending on the value of the upload_if kwarg |
| 258 inherited from upload_file(). |
| 248 | 259 |
| 249 TODO(epoger): Upload multiple files simultaneously to reduce latency. | 260 TODO(epoger): Upload multiple files simultaneously to reduce latency. |
| 250 | |
| 251 TODO(epoger): Add a "noclobber" mode that will not upload any files would | |
| 252 overwrite existing files in Google Storage. | |
| 253 | |
| 254 TODO(epoger): Consider adding a do_compress parameter that would compress | |
| 255 the file using gzip before upload, and add a "Content-Encoding:gzip" header | |
| 256 so that HTTP downloads of the file would be unzipped automatically. | |
| 257 See https://developers.google.com/storage/docs/gsutil/addlhelp/ | |
| 258 WorkingWithObjectMetadata#content-encoding | |
| 259 """ | 261 """ |
| 260 b = self._connect_to_bucket(bucket_name=dest_bucket) | 262 b = self._connect_to_bucket(bucket=dest_bucket) |
| 261 for filename in sorted(os.listdir(source_dir)): | 263 for filename in sorted(os.listdir(source_dir)): |
| 262 local_path = os.path.join(source_dir, filename) | 264 local_path = os.path.join(source_dir, filename) |
| 263 if dest_dir: | 265 if dest_dir: |
| 264 remote_path = posixpath.join(dest_dir, filename) | 266 remote_path = posixpath.join(dest_dir, filename) |
| 265 else: | 267 else: |
| 266 remote_path = filename | 268 remote_path = filename |
| 267 | 269 |
| 268 if os.path.isdir(local_path): | 270 if os.path.isdir(local_path): |
| 269 self.upload_dir_contents( # recurse | 271 self.upload_dir_contents( # recurse |
| 270 source_dir=local_path, dest_bucket=dest_bucket, | 272 source_dir=local_path, dest_bucket=b, dest_dir=remote_path, |
| 271 dest_dir=remote_path, | 273 **kwargs) |
| 272 predefined_acl=predefined_acl, | |
| 273 fine_grained_acl_list=fine_grained_acl_list) | |
| 274 else: | 274 else: |
| 275 key = Key(b) | 275 self.upload_file( |
| 276 key.name = remote_path | 276 source_path=local_path, dest_bucket=b, dest_path=remote_path, |
| 277 try: | 277 **kwargs) |
| 278 key.set_contents_from_filename( | |
| 279 filename=local_path, policy=predefined_acl) | |
| 280 except BotoServerError, e: | |
| 281 e.body = (repr(e.body) + | |
| 282 ' while uploading local_path=%s to bucket=%s, path=%s' % ( | |
| 283 local_path, dest_bucket, remote_path)) | |
| 284 raise | |
| 285 # TODO(epoger): This may be inefficient, because it calls | |
| 286 # _connect_to_bucket() for every file. Depending on how expensive that | |
| 287 # call is, we may want to optimize this. | |
| 288 for (id_type, id_value, permission) in fine_grained_acl_list or []: | |
| 289 self.set_acl( | |
| 290 bucket=dest_bucket, path=remote_path, | |
| 291 id_type=id_type, id_value=id_value, permission=permission) | |
| 292 | 278 |
| 293 def download_file(self, source_bucket, source_path, dest_path, | 279 def download_file(self, source_bucket, source_path, dest_path, |
| 294 create_subdirs_if_needed=False): | 280 create_subdirs_if_needed=False): |
| 295 """Downloads a single file from Google Cloud Storage to local disk. | 281 """Downloads a single file from Google Cloud Storage to local disk. |
| 296 | 282 |
| 297 Args: | 283 Args: |
| 298 source_bucket: GCS bucket to download the file from | 284 source_bucket: GS bucket to download the file from |
| 299 source_path: full path (Posix-style) within that bucket | 285 source_path: full path (Posix-style) within that bucket |
| 300 dest_path: full path (local-OS-style) on local disk to copy the file to | 286 dest_path: full path (local-OS-style) on local disk to copy the file to |
| 301 create_subdirs_if_needed: boolean; whether to create subdirectories as | 287 create_subdirs_if_needed: boolean; whether to create subdirectories as |
| 302 needed to create dest_path | 288 needed to create dest_path |
| 303 """ | 289 """ |
| 304 b = self._connect_to_bucket(bucket_name=source_bucket) | 290 b = self._connect_to_bucket(bucket=source_bucket) |
| 305 key = Key(b) | 291 key = Key(b) |
| 306 key.name = source_path | 292 key.name = source_path |
| 307 if create_subdirs_if_needed: | 293 if create_subdirs_if_needed: |
| 308 _makedirs_if_needed(os.path.dirname(dest_path)) | 294 _makedirs_if_needed(os.path.dirname(dest_path)) |
| 309 with open(dest_path, 'w') as f: | 295 with open(dest_path, 'w') as f: |
| 310 try: | 296 try: |
| 311 key.get_contents_to_file(fp=f) | 297 key.get_contents_to_file(fp=f) |
| 312 except BotoServerError, e: | 298 except BotoServerError, e: |
| 313 e.body = (repr(e.body) + | 299 e.body = (repr(e.body) + |
| 314 ' while downloading bucket=%s, path=%s to local_path=%s' % ( | 300 ' while downloading bucket=%s, path=%s to local_path=%s' % ( |
| 315 source_bucket, source_path, dest_path)) | 301 source_bucket, source_path, dest_path)) |
| 316 raise | 302 raise |
| 317 | 303 |
| 318 def download_dir_contents(self, source_bucket, source_dir, dest_dir): | 304 def download_dir_contents(self, source_bucket, source_dir, dest_dir): |
| 319 """Recursively download contents of a Google Storage directory to local disk | 305 """Recursively download contents of a Google Storage directory to local disk |
| 320 | 306 |
| 321 params: | 307 params: |
| 322 source_bucket: GCS bucket to copy the files from | 308 source_bucket: GS bucket to copy the files from |
| 323 source_dir: full path (Posix-style) within that bucket; read the files | 309 source_dir: full path (Posix-style) within that bucket; read the files |
| 324 from this directory | 310 from this directory |
| 325 dest_dir: full path (local-OS-style) on local disk of directory to copy | 311 dest_dir: full path (local-OS-style) on local disk of directory to copy |
| 326 the files into | 312 the files into |
| 327 | 313 |
| 328 The copy operates as a "merge with overwrite": any files in source_dir will | 314 The copy operates as a "merge with overwrite": any files in source_dir will |
| 329 be "overlaid" on top of the existing content in dest_dir. Existing files | 315 be "overlaid" on top of the existing content in dest_dir. Existing files |
| 330 with the same names will be overwritten. | 316 with the same names will be overwritten. |
| 331 | 317 |
| 332 TODO(epoger): Download multiple files simultaneously to reduce latency. | 318 TODO(epoger): Download multiple files simultaneously to reduce latency. |
| 333 """ | 319 """ |
| 334 _makedirs_if_needed(dest_dir) | 320 _makedirs_if_needed(dest_dir) |
| 335 b = self._connect_to_bucket(bucket_name=source_bucket) | 321 b = self._connect_to_bucket(bucket=source_bucket) |
| 336 (dirs, files) = self.list_bucket_contents( | 322 (dirs, files) = self.list_bucket_contents( |
| 337 bucket=source_bucket, subdir=source_dir) | 323 bucket=source_bucket, subdir=source_dir) |
| 338 | 324 |
| 339 for filename in files: | 325 for filename in files: |
| 340 key = Key(b) | 326 key = Key(b) |
| 341 key.name = posixpath.join(source_dir, filename) | 327 key.name = posixpath.join(source_dir, filename) |
| 342 dest_path = os.path.join(dest_dir, filename) | 328 dest_path = os.path.join(dest_dir, filename) |
| 343 with open(dest_path, 'w') as f: | 329 with open(dest_path, 'w') as f: |
| 344 try: | 330 try: |
| 345 key.get_contents_to_file(fp=f) | 331 key.get_contents_to_file(fp=f) |
| (...skipping 25 matching lines...) Expand all Loading... |
| 371 path: full path (Posix-style) to the file within that bucket | 357 path: full path (Posix-style) to the file within that bucket |
| 372 id_type: must be one of the IdType values defined above | 358 id_type: must be one of the IdType values defined above |
| 373 id_value: get permissions for users whose id_type field contains this | 359 id_value: get permissions for users whose id_type field contains this |
| 374 value | 360 value |
| 375 | 361 |
| 376 Returns: the Permission value which has been set for users matching | 362 Returns: the Permission value which has been set for users matching |
| 377 this id_type/id_value, on this file; or Permission.EMPTY if no such | 363 this id_type/id_value, on this file; or Permission.EMPTY if no such |
| 378 permissions have been set. | 364 permissions have been set. |
| 379 """ | 365 """ |
| 380 field = self._field_by_id_type[id_type] | 366 field = self._field_by_id_type[id_type] |
| 381 b = self._connect_to_bucket(bucket_name=bucket) | 367 b = self._connect_to_bucket(bucket=bucket) |
| 382 acls = b.get_acl(key_name=path) | 368 acls = b.get_acl(key_name=path) |
| 383 matching_entries = [entry for entry in acls.entries.entry_list | 369 matching_entries = [entry for entry in acls.entries.entry_list |
| 384 if (entry.scope.type == id_type) and | 370 if (entry.scope.type == id_type) and |
| 385 (getattr(entry.scope, field) == id_value)] | 371 (getattr(entry.scope, field) == id_value)] |
| 386 if matching_entries: | 372 if matching_entries: |
| 387 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | 373 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) |
| 388 return matching_entries[0].permission | 374 return matching_entries[0].permission |
| 389 else: | 375 else: |
| 390 return self.Permission.EMPTY | 376 return self.Permission.EMPTY |
| 391 | 377 |
| (...skipping 28 matching lines...) Expand all Loading... |
| 420 bucket = 'gs://bucket-name' | 406 bucket = 'gs://bucket-name' |
| 421 path = 'path/to/file' | 407 path = 'path/to/file' |
| 422 id_type = IdType.USER_BY_EMAIL | 408 id_type = IdType.USER_BY_EMAIL |
| 423 id_value = 'epoger@google.com' | 409 id_value = 'epoger@google.com' |
| 424 set_acl(bucket, path, id_type, id_value, Permission.READ) | 410 set_acl(bucket, path, id_type, id_value, Permission.READ) |
| 425 assert Permission.READ == get_acl(bucket, path, id_type, id_value) | 411 assert Permission.READ == get_acl(bucket, path, id_type, id_value) |
| 426 set_acl(bucket, path, id_type, id_value, Permission.WRITE) | 412 set_acl(bucket, path, id_type, id_value, Permission.WRITE) |
| 427 assert Permission.WRITE == get_acl(bucket, path, id_type, id_value) | 413 assert Permission.WRITE == get_acl(bucket, path, id_type, id_value) |
| 428 """ | 414 """ |
| 429 field = self._field_by_id_type[id_type] | 415 field = self._field_by_id_type[id_type] |
| 430 b = self._connect_to_bucket(bucket_name=bucket) | 416 b = self._connect_to_bucket(bucket=bucket) |
| 431 acls = b.get_acl(key_name=path) | 417 acls = b.get_acl(key_name=path) |
| 432 | 418 |
| 433 # Remove any existing entries that refer to the same id_type/id_value, | 419 # Remove any existing entries that refer to the same id_type/id_value, |
| 434 # because the API will fail if we try to set more than one. | 420 # because the API will fail if we try to set more than one. |
| 435 matching_entries = [entry for entry in acls.entries.entry_list | 421 matching_entries = [entry for entry in acls.entries.entry_list |
| 436 if (entry.scope.type == id_type) and | 422 if (entry.scope.type == id_type) and |
| 437 (getattr(entry.scope, field) == id_value)] | 423 (getattr(entry.scope, field) == id_value)] |
| 438 if matching_entries: | 424 if matching_entries: |
| 439 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | 425 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) |
| 440 acls.entries.entry_list.remove(matching_entries[0]) | 426 acls.entries.entry_list.remove(matching_entries[0]) |
| (...skipping 17 matching lines...) Expand all Loading... |
| 458 Args: | 444 Args: |
| 459 bucket: name of the Google Storage bucket | 445 bucket: name of the Google Storage bucket |
| 460 subdir: directory within the bucket to list, or None for root directory | 446 subdir: directory within the bucket to list, or None for root directory |
| 461 """ | 447 """ |
| 462 # The GS command relies on the prefix (if any) ending with a slash. | 448 # The GS command relies on the prefix (if any) ending with a slash. |
| 463 prefix = subdir or '' | 449 prefix = subdir or '' |
| 464 if prefix and not prefix.endswith('/'): | 450 if prefix and not prefix.endswith('/'): |
| 465 prefix += '/' | 451 prefix += '/' |
| 466 prefix_length = len(prefix) if prefix else 0 | 452 prefix_length = len(prefix) if prefix else 0 |
| 467 | 453 |
| 468 b = self._connect_to_bucket(bucket_name=bucket) | 454 b = self._connect_to_bucket(bucket=bucket) |
| 469 items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') | 455 items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
| 470 dirs = [] | 456 dirs = [] |
| 471 files = [] | 457 files = [] |
| 472 for item in items: | 458 for item in items: |
| 473 t = type(item) | 459 t = type(item) |
| 474 if t is Key: | 460 if t is Key: |
| 475 files.append(item.name[prefix_length:]) | 461 files.append(item.name[prefix_length:]) |
| 476 elif t is Prefix: | 462 elif t is Prefix: |
| 477 dirs.append(item.name[prefix_length:-1]) | 463 dirs.append(item.name[prefix_length:-1]) |
| 478 return (dirs, files) | 464 return (dirs, files) |
| 479 | 465 |
| 480 def _connect_to_bucket(self, bucket_name): | 466 def _connect_to_bucket(self, bucket): |
| 481 """Returns a Bucket object we can use to access a particular bucket in GS. | 467 """Returns a Bucket object we can use to access a particular bucket in GS. |
| 482 | 468 |
| 483 Params: | 469 Params: |
| 484 bucket_name: name of the bucket (e.g., 'chromium-skia-gm') | 470 bucket: name of the bucket (e.g., 'chromium-skia-gm'), or a Bucket |
| 471 object (in which case this param is just returned as-is) |
| 485 """ | 472 """ |
| 473 if type(bucket) is Bucket: |
| 474 return bucket |
| 486 try: | 475 try: |
| 487 return self._create_connection().get_bucket(bucket_name=bucket_name) | 476 return self._create_connection().get_bucket(bucket_name=bucket) |
| 488 except BotoServerError, e: | 477 except BotoServerError, e: |
| 489 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket_name | 478 e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket |
| 490 raise | 479 raise |
| 491 | 480 |
| 492 def _create_connection(self): | 481 def _create_connection(self): |
| 493 """Returns a GSConnection object we can use to access Google Storage.""" | 482 """Returns a GSConnection object we can use to access Google Storage.""" |
| 494 if self._gs_access_key_id: | 483 if self._gs_access_key_id: |
| 495 return GSConnection( | 484 return GSConnection( |
| 496 gs_access_key_id=self._gs_access_key_id, | 485 gs_access_key_id=self._gs_access_key_id, |
| 497 gs_secret_access_key=self._gs_secret_access_key) | 486 gs_secret_access_key=self._gs_secret_access_key) |
| 498 else: | 487 else: |
| 499 return AnonymousGSConnection() | 488 return AnonymousGSConnection() |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 539 | 528 |
| 540 def _get_local_md5(path): | 529 def _get_local_md5(path): |
| 541 """Returns the MD5 hash of a file on local disk.""" | 530 """Returns the MD5 hash of a file on local disk.""" |
| 542 hasher = hashlib.md5() | 531 hasher = hashlib.md5() |
| 543 with open(path, 'rb') as f: | 532 with open(path, 'rb') as f: |
| 544 while True: | 533 while True: |
| 545 data = f.read(64*1024) | 534 data = f.read(64*1024) |
| 546 if not data: | 535 if not data: |
| 547 return hasher.hexdigest() | 536 return hasher.hexdigest() |
| 548 hasher.update(data) | 537 hasher.update(data) |
| OLD | NEW |