Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 | 2 |
| 3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
| 4 """ | 4 """ |
| 5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
| 6 | 6 |
| 7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
| 8 found in the LICENSE file. | 8 found in the LICENSE file. |
| 9 | 9 |
| 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
| (...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 195 dest_dir: full path (Posix-style) within that bucket; write the files into | 195 dest_dir: full path (Posix-style) within that bucket; write the files into |
| 196 this directory | 196 this directory |
| 197 predefined_acl: which predefined ACL to apply to the files on Google | 197 predefined_acl: which predefined ACL to apply to the files on Google |
| 198 Storage; must be one of the PREDEFINED_ACL_* constants defined above. | 198 Storage; must be one of the PREDEFINED_ACL_* constants defined above. |
| 199 If None, inherits dest_bucket's default object ACL. | 199 If None, inherits dest_bucket's default object ACL. |
| 200 TODO(epoger): add unittests for this param, although it seems to work | 200 TODO(epoger): add unittests for this param, although it seems to work |
| 201 in my manual testing | 201 in my manual testing |
| 202 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | 202 fine_grained_acl_list: list of (id_type, id_value, permission) tuples |
| 203 to apply to every file uploaded (on top of the predefined_acl), | 203 to apply to every file uploaded (on top of the predefined_acl), |
| 204 or None if predefined_acl is sufficient | 204 or None if predefined_acl is sufficient |
| 205 TODO(epoger): add unittests for this param, although it seems to work | |
| 206 in my manual testing | |
| 207 | 205 |
| 208 The copy operates as a "merge with overwrite": any files in source_dir will | 206 The copy operates as a "merge with overwrite": any files in source_dir will |
| 209 be "overlaid" on top of the existing content in dest_dir. Existing files | 207 be "overlaid" on top of the existing content in dest_dir. Existing files |
| 210 with the same names will be overwritten. | 208 with the same names will be overwritten. |
| 211 | 209 |
| 212 TODO(epoger): Upload multiple files simultaneously to reduce latency. | 210 TODO(epoger): Upload multiple files simultaneously to reduce latency. |
| 213 | 211 |
| 214 TODO(epoger): Add a "noclobber" mode that will not upload any files would | 212 TODO(epoger): Add a "noclobber" mode that will not upload any files would |
| 215 overwrite existing files in Google Storage. | 213 overwrite existing files in Google Storage. |
| 216 | 214 |
| 217 TODO(epoger): Consider adding a do_compress parameter that would compress | 215 TODO(epoger): Consider adding a do_compress parameter that would compress |
| 218 the file using gzip before upload, and add a "Content-Encoding:gzip" header | 216 the file using gzip before upload, and add a "Content-Encoding:gzip" header |
| 219 so that HTTP downloads of the file would be unzipped automatically. | 217 so that HTTP downloads of the file would be unzipped automatically. |
| 220 See https://developers.google.com/storage/docs/gsutil/addlhelp/ | 218 See https://developers.google.com/storage/docs/gsutil/addlhelp/ |
| 221 WorkingWithObjectMetadata#content-encoding | 219 WorkingWithObjectMetadata#content-encoding |
| 222 """ | 220 """ |
| 223 b = self._connect_to_bucket(bucket_name=dest_bucket) | 221 b = self._connect_to_bucket(bucket_name=dest_bucket) |
| 224 for filename in sorted(os.listdir(source_dir)): | 222 for filename in sorted(os.listdir(source_dir)): |
| 225 local_path = os.path.join(source_dir, filename) | 223 local_path = os.path.join(source_dir, filename) |
| 226 if os.path.isdir(local_path): | 224 if os.path.isdir(local_path): |
| 227 self.upload_dir_contents( # recurse | 225 self.upload_dir_contents( # recurse |
| 228 source_dir=local_path, dest_bucket=dest_bucket, | 226 source_dir=local_path, dest_bucket=dest_bucket, |
| 229 dest_dir=posixpath.join(dest_dir, filename), | 227 dest_dir=posixpath.join(dest_dir, filename), |
| 230 predefined_acl=predefined_acl) | 228 predefined_acl=predefined_acl, |
| 229 fine_grained_acl_list=fine_grained_acl_list) | |
|
epoger
2014/07/18 20:41:26
whoops
| |
| 231 else: | 230 else: |
| 232 item = Key(b) | 231 item = Key(b) |
| 233 item.key = posixpath.join(dest_dir, filename) | 232 dest_path = posixpath.join(dest_dir, filename) |
| 233 item.key = dest_path | |
| 234 try: | 234 try: |
| 235 item.set_contents_from_filename( | 235 item.set_contents_from_filename( |
| 236 filename=local_path, policy=predefined_acl) | 236 filename=local_path, policy=predefined_acl) |
| 237 except BotoServerError, e: | 237 except BotoServerError, e: |
| 238 e.body = (repr(e.body) + | 238 e.body = (repr(e.body) + |
| 239 ' while uploading local_path=%s to bucket=%s, path=%s' % ( | 239 ' while uploading local_path=%s to bucket=%s, path=%s' % ( |
| 240 local_path, dest_bucket, item.key)) | 240 local_path, dest_bucket, dest_path)) |
| 241 raise | 241 raise |
| 242 # TODO(epoger): This may be inefficient, because it calls | 242 # TODO(epoger): This may be inefficient, because it calls |
| 243 # _connect_to_bucket() for every file. Depending on how expensive that | 243 # _connect_to_bucket() for every file. Depending on how expensive that |
| 244 # call is, we may want to optimize this. | 244 # call is, we may want to optimize this. |
| 245 for (id_type, id_value, permission) in fine_grained_acl_list or []: | 245 for (id_type, id_value, permission) in fine_grained_acl_list or []: |
| 246 self.set_acl( | 246 self.set_acl( |
| 247 bucket=dest_bucket, path=item.key, | 247 bucket=dest_bucket, path=dest_path, |
| 248 id_type=id_type, id_value=id_value, permission=permission) | 248 id_type=id_type, id_value=id_value, permission=permission) |
| 249 | 249 |
| 250 def download_file(self, source_bucket, source_path, dest_path, | 250 def download_file(self, source_bucket, source_path, dest_path, |
| 251 create_subdirs_if_needed=False): | 251 create_subdirs_if_needed=False): |
| 252 """Downloads a single file from Google Cloud Storage to local disk. | 252 """Downloads a single file from Google Cloud Storage to local disk. |
| 253 | 253 |
| 254 Args: | 254 Args: |
| 255 source_bucket: GCS bucket to download the file from | 255 source_bucket: GCS bucket to download the file from |
| 256 source_path: full path (Posix-style) within that bucket | 256 source_path: full path (Posix-style) within that bucket |
| 257 dest_path: full path (local-OS-style) on local disk to copy the file to | 257 dest_path: full path (local-OS-style) on local disk to copy the file to |
| (...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 620 Do you have a ~/.boto file that provides the credentials needed to read | 620 Do you have a ~/.boto file that provides the credentials needed to read |
| 621 and write gs://chromium-skia-gm ? | 621 and write gs://chromium-skia-gm ? |
| 622 """ | 622 """ |
| 623 raise | 623 raise |
| 624 | 624 |
| 625 bucket = 'chromium-skia-gm' | 625 bucket = 'chromium-skia-gm' |
| 626 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) | 626 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
| 627 subdir = 'subdir' | 627 subdir = 'subdir' |
| 628 filenames = ['file1', 'file2'] | 628 filenames = ['file1', 'file2'] |
| 629 | 629 |
| 630 # Create directory tree on local disk, and upload it. | 630 # Create directory tree on local disk and upload it. |
| 631 id_type = ID_TYPE_GROUP_BY_DOMAIN | |
| 632 id_value = 'chromium.org' | |
| 633 set_permission = PERMISSION_READ | |
| 631 local_src_dir = tempfile.mkdtemp() | 634 local_src_dir = tempfile.mkdtemp() |
| 632 os.mkdir(os.path.join(local_src_dir, subdir)) | 635 os.mkdir(os.path.join(local_src_dir, subdir)) |
| 633 try: | 636 try: |
| 634 for filename in filenames: | 637 for filename in filenames: |
| 635 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: | 638 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
| 636 f.write('contents of %s\n' % filename) | 639 f.write('contents of %s\n' % filename) |
| 637 gs.upload_dir_contents(source_dir=local_src_dir, dest_bucket=bucket, | 640 gs.upload_dir_contents( |
|
epoger
2014/07/18 20:41:26
This was indented wrong before (and calling upload
| |
| 638 dest_dir=remote_dir) | 641 source_dir=local_src_dir, dest_bucket=bucket, dest_dir=remote_dir, |
| 642 predefined_acl=PREDEFINED_ACL_PRIVATE, | |
| 643 fine_grained_acl_list=[(id_type, id_value, set_permission)]) | |
| 639 finally: | 644 finally: |
| 640 shutil.rmtree(local_src_dir) | 645 shutil.rmtree(local_src_dir) |
| 641 | 646 |
| 642 # Validate the list of the files we uploaded to Google Storage. | 647 # Validate the list of the files we uploaded to Google Storage. |
| 643 (dirs, files) = gs.list_bucket_contents( | 648 (dirs, files) = gs.list_bucket_contents( |
| 644 bucket=bucket, subdir=remote_dir) | 649 bucket=bucket, subdir=remote_dir) |
| 645 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) | 650 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) |
| 646 assert files == [], '%s == []' % files | 651 assert files == [], '%s == []' % files |
| 647 (dirs, files) = gs.list_bucket_contents( | 652 (dirs, files) = gs.list_bucket_contents( |
| 648 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 653 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
| 649 assert dirs == [], '%s == []' % dirs | 654 assert dirs == [], '%s == []' % dirs |
| 650 assert files == filenames, '%s == %s' % (files, filenames) | 655 assert files == filenames, '%s == %s' % (files, filenames) |
| 651 | 656 |
| 657 # Check the fine-grained ACLs we set in Google Storage. | |
| 658 for filename in filenames: | |
| 659 got_permission = gs.get_acl( | |
| 660 bucket=bucket, path=posixpath.join(remote_dir, subdir, filename), | |
| 661 id_type=id_type, id_value=id_value) | |
| 662 assert got_permission == set_permission, '%s == %s' % ( | |
| 663 got_permission, set_permission) | |
| 664 | |
| 652 # Download the directory tree we just uploaded, make sure its contents | 665 # Download the directory tree we just uploaded, make sure its contents |
| 653 # are what we expect, and then delete the tree in Google Storage. | 666 # are what we expect, and then delete the tree in Google Storage. |
| 654 local_dest_dir = tempfile.mkdtemp() | 667 local_dest_dir = tempfile.mkdtemp() |
| 655 try: | 668 try: |
| 656 gs.download_dir_contents(source_bucket=bucket, source_dir=remote_dir, | 669 gs.download_dir_contents(source_bucket=bucket, source_dir=remote_dir, |
| 657 dest_dir=local_dest_dir) | 670 dest_dir=local_dest_dir) |
| 658 for filename in filenames: | 671 for filename in filenames: |
| 659 with open(os.path.join(local_dest_dir, subdir, filename)) as f: | 672 with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
| 660 file_contents = f.read() | 673 file_contents = f.read() |
| 661 assert file_contents == 'contents of %s\n' % filename, ( | 674 assert file_contents == 'contents of %s\n' % filename, ( |
| 662 '%s == "contents of %s\n"' % (file_contents, filename)) | 675 '%s == "contents of %s\n"' % (file_contents, filename)) |
| 663 finally: | 676 finally: |
| 664 shutil.rmtree(local_dest_dir) | 677 shutil.rmtree(local_dest_dir) |
| 665 for filename in filenames: | 678 for filename in filenames: |
| 666 gs.delete_file(bucket=bucket, | 679 gs.delete_file(bucket=bucket, |
| 667 path=posixpath.join(remote_dir, subdir, filename)) | 680 path=posixpath.join(remote_dir, subdir, filename)) |
| 668 | 681 |
| 669 | 682 |
| 670 # TODO(epoger): How should we exercise these self-tests? | 683 # TODO(epoger): How should we exercise these self-tests? |
| 671 # See http://skbug.com/2751 | 684 # See http://skbug.com/2751 |
| 672 if __name__ == '__main__': | 685 if __name__ == '__main__': |
| 673 _test_public_read() | 686 _test_public_read() |
| 674 _test_authenticated_round_trip() | 687 _test_authenticated_round_trip() |
| 675 _test_dir_upload_and_download() | 688 _test_dir_upload_and_download() |
| 676 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise | 689 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise |
| 677 # an exception when we try to access without needed credentials. | 690 # an exception when we try to access without needed credentials. |
| OLD | NEW |