OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
195 dest_dir: full path (Posix-style) within that bucket; write the files into | 195 dest_dir: full path (Posix-style) within that bucket; write the files into |
196 this directory | 196 this directory |
197 predefined_acl: which predefined ACL to apply to the files on Google | 197 predefined_acl: which predefined ACL to apply to the files on Google |
198 Storage; must be one of the PREDEFINED_ACL_* constants defined above. | 198 Storage; must be one of the PREDEFINED_ACL_* constants defined above. |
199 If None, inherits dest_bucket's default object ACL. | 199 If None, inherits dest_bucket's default object ACL. |
200 TODO(epoger): add unittests for this param, although it seems to work | 200 TODO(epoger): add unittests for this param, although it seems to work |
201 in my manual testing | 201 in my manual testing |
202 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | 202 fine_grained_acl_list: list of (id_type, id_value, permission) tuples |
203 to apply to every file uploaded (on top of the predefined_acl), | 203 to apply to every file uploaded (on top of the predefined_acl), |
204 or None if predefined_acl is sufficient | 204 or None if predefined_acl is sufficient |
205 TODO(epoger): add unittests for this param, although it seems to work | |
206 in my manual testing | |
207 | 205 |
208 The copy operates as a "merge with overwrite": any files in source_dir will | 206 The copy operates as a "merge with overwrite": any files in source_dir will |
209 be "overlaid" on top of the existing content in dest_dir. Existing files | 207 be "overlaid" on top of the existing content in dest_dir. Existing files |
210 with the same names will be overwritten. | 208 with the same names will be overwritten. |
211 | 209 |
212 TODO(epoger): Upload multiple files simultaneously to reduce latency. | 210 TODO(epoger): Upload multiple files simultaneously to reduce latency. |
213 | 211 |
214 TODO(epoger): Add a "noclobber" mode that will not upload any files would | 212 TODO(epoger): Add a "noclobber" mode that will not upload any files would |
215 overwrite existing files in Google Storage. | 213 overwrite existing files in Google Storage. |
216 | 214 |
217 TODO(epoger): Consider adding a do_compress parameter that would compress | 215 TODO(epoger): Consider adding a do_compress parameter that would compress |
218 the file using gzip before upload, and add a "Content-Encoding:gzip" header | 216 the file using gzip before upload, and add a "Content-Encoding:gzip" header |
219 so that HTTP downloads of the file would be unzipped automatically. | 217 so that HTTP downloads of the file would be unzipped automatically. |
220 See https://developers.google.com/storage/docs/gsutil/addlhelp/ | 218 See https://developers.google.com/storage/docs/gsutil/addlhelp/ |
221 WorkingWithObjectMetadata#content-encoding | 219 WorkingWithObjectMetadata#content-encoding |
222 """ | 220 """ |
223 b = self._connect_to_bucket(bucket_name=dest_bucket) | 221 b = self._connect_to_bucket(bucket_name=dest_bucket) |
224 for filename in sorted(os.listdir(source_dir)): | 222 for filename in sorted(os.listdir(source_dir)): |
225 local_path = os.path.join(source_dir, filename) | 223 local_path = os.path.join(source_dir, filename) |
226 if os.path.isdir(local_path): | 224 if os.path.isdir(local_path): |
227 self.upload_dir_contents( # recurse | 225 self.upload_dir_contents( # recurse |
228 source_dir=local_path, dest_bucket=dest_bucket, | 226 source_dir=local_path, dest_bucket=dest_bucket, |
229 dest_dir=posixpath.join(dest_dir, filename), | 227 dest_dir=posixpath.join(dest_dir, filename), |
230 predefined_acl=predefined_acl) | 228 predefined_acl=predefined_acl, |
229 fine_grained_acl_list=fine_grained_acl_list) | |
epoger
2014/07/18 20:41:26
whoops
| |
231 else: | 230 else: |
232 item = Key(b) | 231 item = Key(b) |
233 item.key = posixpath.join(dest_dir, filename) | 232 dest_path = posixpath.join(dest_dir, filename) |
233 item.key = dest_path | |
234 try: | 234 try: |
235 item.set_contents_from_filename( | 235 item.set_contents_from_filename( |
236 filename=local_path, policy=predefined_acl) | 236 filename=local_path, policy=predefined_acl) |
237 except BotoServerError, e: | 237 except BotoServerError, e: |
238 e.body = (repr(e.body) + | 238 e.body = (repr(e.body) + |
239 ' while uploading local_path=%s to bucket=%s, path=%s' % ( | 239 ' while uploading local_path=%s to bucket=%s, path=%s' % ( |
240 local_path, dest_bucket, item.key)) | 240 local_path, dest_bucket, dest_path)) |
241 raise | 241 raise |
242 # TODO(epoger): This may be inefficient, because it calls | 242 # TODO(epoger): This may be inefficient, because it calls |
243 # _connect_to_bucket() for every file. Depending on how expensive that | 243 # _connect_to_bucket() for every file. Depending on how expensive that |
244 # call is, we may want to optimize this. | 244 # call is, we may want to optimize this. |
245 for (id_type, id_value, permission) in fine_grained_acl_list or []: | 245 for (id_type, id_value, permission) in fine_grained_acl_list or []: |
246 self.set_acl( | 246 self.set_acl( |
247 bucket=dest_bucket, path=item.key, | 247 bucket=dest_bucket, path=dest_path, |
248 id_type=id_type, id_value=id_value, permission=permission) | 248 id_type=id_type, id_value=id_value, permission=permission) |
249 | 249 |
250 def download_file(self, source_bucket, source_path, dest_path, | 250 def download_file(self, source_bucket, source_path, dest_path, |
251 create_subdirs_if_needed=False): | 251 create_subdirs_if_needed=False): |
252 """Downloads a single file from Google Cloud Storage to local disk. | 252 """Downloads a single file from Google Cloud Storage to local disk. |
253 | 253 |
254 Args: | 254 Args: |
255 source_bucket: GCS bucket to download the file from | 255 source_bucket: GCS bucket to download the file from |
256 source_path: full path (Posix-style) within that bucket | 256 source_path: full path (Posix-style) within that bucket |
257 dest_path: full path (local-OS-style) on local disk to copy the file to | 257 dest_path: full path (local-OS-style) on local disk to copy the file to |
(...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
620 Do you have a ~/.boto file that provides the credentials needed to read | 620 Do you have a ~/.boto file that provides the credentials needed to read |
621 and write gs://chromium-skia-gm ? | 621 and write gs://chromium-skia-gm ? |
622 """ | 622 """ |
623 raise | 623 raise |
624 | 624 |
625 bucket = 'chromium-skia-gm' | 625 bucket = 'chromium-skia-gm' |
626 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) | 626 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
627 subdir = 'subdir' | 627 subdir = 'subdir' |
628 filenames = ['file1', 'file2'] | 628 filenames = ['file1', 'file2'] |
629 | 629 |
630 # Create directory tree on local disk, and upload it. | 630 # Create directory tree on local disk and upload it. |
631 id_type = ID_TYPE_GROUP_BY_DOMAIN | |
632 id_value = 'chromium.org' | |
633 set_permission = PERMISSION_READ | |
631 local_src_dir = tempfile.mkdtemp() | 634 local_src_dir = tempfile.mkdtemp() |
632 os.mkdir(os.path.join(local_src_dir, subdir)) | 635 os.mkdir(os.path.join(local_src_dir, subdir)) |
633 try: | 636 try: |
634 for filename in filenames: | 637 for filename in filenames: |
635 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: | 638 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
636 f.write('contents of %s\n' % filename) | 639 f.write('contents of %s\n' % filename) |
637 gs.upload_dir_contents(source_dir=local_src_dir, dest_bucket=bucket, | 640 gs.upload_dir_contents( |
epoger
2014/07/18 20:41:26
This was indented wrong before (and calling upload
| |
638 dest_dir=remote_dir) | 641 source_dir=local_src_dir, dest_bucket=bucket, dest_dir=remote_dir, |
642 predefined_acl=PREDEFINED_ACL_PRIVATE, | |
643 fine_grained_acl_list=[(id_type, id_value, set_permission)]) | |
639 finally: | 644 finally: |
640 shutil.rmtree(local_src_dir) | 645 shutil.rmtree(local_src_dir) |
641 | 646 |
642 # Validate the list of the files we uploaded to Google Storage. | 647 # Validate the list of the files we uploaded to Google Storage. |
643 (dirs, files) = gs.list_bucket_contents( | 648 (dirs, files) = gs.list_bucket_contents( |
644 bucket=bucket, subdir=remote_dir) | 649 bucket=bucket, subdir=remote_dir) |
645 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) | 650 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) |
646 assert files == [], '%s == []' % files | 651 assert files == [], '%s == []' % files |
647 (dirs, files) = gs.list_bucket_contents( | 652 (dirs, files) = gs.list_bucket_contents( |
648 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 653 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
649 assert dirs == [], '%s == []' % dirs | 654 assert dirs == [], '%s == []' % dirs |
650 assert files == filenames, '%s == %s' % (files, filenames) | 655 assert files == filenames, '%s == %s' % (files, filenames) |
651 | 656 |
657 # Check the fine-grained ACLs we set in Google Storage. | |
658 for filename in filenames: | |
659 got_permission = gs.get_acl( | |
660 bucket=bucket, path=posixpath.join(remote_dir, subdir, filename), | |
661 id_type=id_type, id_value=id_value) | |
662 assert got_permission == set_permission, '%s == %s' % ( | |
663 got_permission, set_permission) | |
664 | |
652 # Download the directory tree we just uploaded, make sure its contents | 665 # Download the directory tree we just uploaded, make sure its contents |
653 # are what we expect, and then delete the tree in Google Storage. | 666 # are what we expect, and then delete the tree in Google Storage. |
654 local_dest_dir = tempfile.mkdtemp() | 667 local_dest_dir = tempfile.mkdtemp() |
655 try: | 668 try: |
656 gs.download_dir_contents(source_bucket=bucket, source_dir=remote_dir, | 669 gs.download_dir_contents(source_bucket=bucket, source_dir=remote_dir, |
657 dest_dir=local_dest_dir) | 670 dest_dir=local_dest_dir) |
658 for filename in filenames: | 671 for filename in filenames: |
659 with open(os.path.join(local_dest_dir, subdir, filename)) as f: | 672 with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
660 file_contents = f.read() | 673 file_contents = f.read() |
661 assert file_contents == 'contents of %s\n' % filename, ( | 674 assert file_contents == 'contents of %s\n' % filename, ( |
662 '%s == "contents of %s\n"' % (file_contents, filename)) | 675 '%s == "contents of %s\n"' % (file_contents, filename)) |
663 finally: | 676 finally: |
664 shutil.rmtree(local_dest_dir) | 677 shutil.rmtree(local_dest_dir) |
665 for filename in filenames: | 678 for filename in filenames: |
666 gs.delete_file(bucket=bucket, | 679 gs.delete_file(bucket=bucket, |
667 path=posixpath.join(remote_dir, subdir, filename)) | 680 path=posixpath.join(remote_dir, subdir, filename)) |
668 | 681 |
669 | 682 |
670 # TODO(epoger): How should we exercise these self-tests? | 683 # TODO(epoger): How should we exercise these self-tests? |
671 # See http://skbug.com/2751 | 684 # See http://skbug.com/2751 |
672 if __name__ == '__main__': | 685 if __name__ == '__main__': |
673 _test_public_read() | 686 _test_public_read() |
674 _test_authenticated_round_trip() | 687 _test_authenticated_round_trip() |
675 _test_dir_upload_and_download() | 688 _test_dir_upload_and_download() |
676 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise | 689 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise |
677 # an exception when we try to access without needed credentials. | 690 # an exception when we try to access without needed credentials. |
OLD | NEW |