Index: py/utils/gs_utils.py |
diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py |
index c30295b7ea2f096acec73e0924f21df1a297da87..46c9bf17afe2aa9bc654135f02972e2f57a35320 100755 |
--- a/py/utils/gs_utils.py |
+++ b/py/utils/gs_utils.py |
@@ -40,88 +40,110 @@ from boto.s3.bucketlistresultset import BucketListResultSet |
from boto.s3.prefix import Prefix |
-def delete_file(bucket, path): |
- """Delete a single file within a GS bucket. |
- |
- TODO(epoger): what if bucket or path does not exist? Should probably raise |
- an exception. Implement, and add a test to exercise this. |
- |
- Params: |
- bucket: GS bucket to delete a file from |
- path: full path (Posix-style) of the file within the bucket to delete |
- """ |
- conn = _create_connection() |
- b = conn.get_bucket(bucket_name=bucket) |
- item = Key(b) |
- item.key = path |
- item.delete() |
- |
- |
-def upload_file(source_path, dest_bucket, dest_path): |
- """Upload contents of a local file to Google Storage. |
- |
- TODO(epoger): Add the extra parameters provided by upload_file() within |
- https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py , |
- so we can replace that function with this one. |
- |
- params: |
- source_path: full path (local-OS-style) on local disk to read from |
- dest_bucket: GCS bucket to copy the file to |
- dest_path: full path (Posix-style) within that bucket |
- """ |
- conn = _create_connection() |
- b = conn.get_bucket(bucket_name=dest_bucket) |
- item = Key(b) |
- item.key = dest_path |
- item.set_contents_from_filename(filename=source_path) |
- |
- |
-def download_file(source_bucket, source_path, dest_path, |
- create_subdirs_if_needed=False): |
- """ Downloads a single file from Google Cloud Storage to local disk. |
- |
- Args: |
- source_bucket: GCS bucket to download the file from |
- source_path: full path (Posix-style) within that bucket |
- dest_path: full path (local-OS-style) on local disk to copy the file to |
- create_subdirs_if_needed: boolean; whether to create subdirectories as |
- needed to create dest_path |
- """ |
- conn = _create_connection() |
- b = conn.get_bucket(bucket_name=source_bucket) |
- item = Key(b) |
- item.key = source_path |
- if create_subdirs_if_needed: |
- _makedirs_if_needed(os.path.dirname(dest_path)) |
- with open(dest_path, 'w') as f: |
- item.get_contents_to_file(fp=f) |
- |
- |
-def list_bucket_contents(bucket, subdir=None): |
- """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. |
- |
- Args: |
- bucket: name of the Google Storage bucket |
- subdir: directory within the bucket to list, or None for root directory |
- """ |
- # The GS command relies on the prefix (if any) ending with a slash. |
- prefix = subdir or '' |
- if prefix and not prefix.endswith('/'): |
- prefix += '/' |
- prefix_length = len(prefix) if prefix else 0 |
- |
- conn = _create_connection() |
- b = conn.get_bucket(bucket_name=bucket) |
- lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
- dirs = [] |
- files = [] |
- for item in lister: |
- t = type(item) |
- if t is Key: |
- files.append(item.key[prefix_length:]) |
- elif t is Prefix: |
- dirs.append(item.name[prefix_length:-1]) |
- return (dirs, files) |
+class GSUtils(object): |
+ """Utilities for accessing Google Cloud Storage, using the boto library.""" |
+ |
+ def __init__(self, boto_file_path=os.path.join('~','.boto')): |
+ """Constructor. |
+ |
+ Params: |
+ boto_file_path: full path (local-OS-style) on local disk where .boto |
+ credentials file can be found. An exception is thrown if this file |
+ is missing. |
+ TODO(epoger): Change missing-file behavior: allow the caller to |
+ operate on public files in Google Storage. |
+ """ |
+ boto_file_path = os.path.expanduser(boto_file_path) |
+ print 'Reading boto file from %s' % boto_file_path |
+ boto_dict = _config_file_as_dict(filepath=boto_file_path) |
+ self._gs_access_key_id = boto_dict['gs_access_key_id'] |
+ self._gs_secret_access_key = boto_dict['gs_secret_access_key'] |
+ |
+ def delete_file(self, bucket, path): |
+ """Delete a single file within a GS bucket. |
+ |
+ TODO(epoger): what if bucket or path does not exist? Should probably raise |
+ an exception. Implement, and add a test to exercise this. |
+ |
+ Params: |
+ bucket: GS bucket to delete a file from |
+ path: full path (Posix-style) of the file within the bucket to delete |
+ """ |
+ conn = self._create_connection() |
+ b = conn.get_bucket(bucket_name=bucket) |
+ item = Key(b) |
+ item.key = path |
+ item.delete() |
+ |
+ def upload_file(self, source_path, dest_bucket, dest_path): |
+ """Upload contents of a local file to Google Storage. |
+ |
+ TODO(epoger): Add the extra parameters provided by upload_file() within |
+ https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py , |
+ so we can replace that function with this one. |
+ |
+ params: |
+ source_path: full path (local-OS-style) on local disk to read from |
+ dest_bucket: GCS bucket to copy the file to |
+ dest_path: full path (Posix-style) within that bucket |
+ """ |
+ conn = self._create_connection() |
+ b = conn.get_bucket(bucket_name=dest_bucket) |
+ item = Key(b) |
+ item.key = dest_path |
+ item.set_contents_from_filename(filename=source_path) |
+ |
+ def download_file(self, source_bucket, source_path, dest_path, |
+ create_subdirs_if_needed=False): |
+ """Downloads a single file from Google Cloud Storage to local disk. |
+ |
+ Args: |
+ source_bucket: GCS bucket to download the file from |
+ source_path: full path (Posix-style) within that bucket |
+ dest_path: full path (local-OS-style) on local disk to copy the file to |
+ create_subdirs_if_needed: boolean; whether to create subdirectories as |
+ needed to create dest_path |
+ """ |
+ conn = self._create_connection() |
+ b = conn.get_bucket(bucket_name=source_bucket) |
+ item = Key(b) |
+ item.key = source_path |
+ if create_subdirs_if_needed: |
+ _makedirs_if_needed(os.path.dirname(dest_path)) |
+ with open(dest_path, 'w') as f: |
+ item.get_contents_to_file(fp=f) |
+ |
+ def list_bucket_contents(self, bucket, subdir=None): |
+ """Returns files in the Google Storage bucket as a (dirs, files) tuple. |
+ |
+ Args: |
+ bucket: name of the Google Storage bucket |
+ subdir: directory within the bucket to list, or None for root directory |
+ """ |
+ # The GS command relies on the prefix (if any) ending with a slash. |
+ prefix = subdir or '' |
+ if prefix and not prefix.endswith('/'): |
+ prefix += '/' |
+ prefix_length = len(prefix) if prefix else 0 |
+ |
+ conn = self._create_connection() |
+ b = conn.get_bucket(bucket_name=bucket) |
+ lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
+ dirs = [] |
+ files = [] |
+ for item in lister: |
+ t = type(item) |
+ if t is Key: |
+ files.append(item.key[prefix_length:]) |
+ elif t is Prefix: |
+ dirs.append(item.name[prefix_length:-1]) |
+ return (dirs, files) |
+ |
+ def _create_connection(self): |
+ """Returns a GSConnection object we can use to access Google Storage.""" |
+ return GSConnection( |
+ gs_access_key_id=self._gs_access_key_id, |
+ gs_secret_access_key=self._gs_secret_access_key) |
def _config_file_as_dict(filepath): |
@@ -148,29 +170,8 @@ def _config_file_as_dict(filepath): |
return dic |
-def _create_connection(boto_file_path=os.path.join('~','.boto')): |
- """Returns a GSConnection object we can use to access Google Storage. |
- |
- Params: |
- boto_file_path: full path (local-OS-style) on local disk where .boto |
- credentials file can be found |
- |
- TODO(epoger): Change this module to be object-based, where __init__() reads |
- the boto file into boto_dict once instead of repeatedly for each operation. |
- |
- TODO(epoger): if the file does not exist, rather than raising an exception, |
- create a GSConnection that can operate on public files. |
- """ |
- boto_file_path = os.path.expanduser(boto_file_path) |
- print 'Reading boto file from %s' % boto_file_path |
- boto_dict = _config_file_as_dict(filepath=boto_file_path) |
- return GSConnection( |
- gs_access_key_id=boto_dict['gs_access_key_id'], |
- gs_secret_access_key=boto_dict['gs_secret_access_key']) |
- |
- |
def _makedirs_if_needed(path): |
- """ Creates a directory (and any parent directories needed), if it does not |
+ """Creates a directory (and any parent directories needed), if it does not |
exist yet. |
Args: |
@@ -188,6 +189,7 @@ def _run_self_test(): |
remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
subdir = 'subdir' |
filenames_to_upload = ['file1', 'file2'] |
+ gs = GSUtils() |
# Upload test files to Google Storage. |
local_src_dir = tempfile.mkdtemp() |
@@ -196,46 +198,47 @@ def _run_self_test(): |
for filename in filenames_to_upload: |
with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
f.write('contents of %s\n' % filename) |
- upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |
- dest_bucket=bucket, |
- dest_path=posixpath.join(remote_dir, subdir, filename)) |
+ gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |
+ dest_bucket=bucket, |
+ dest_path=posixpath.join(remote_dir, subdir, filename)) |
finally: |
shutil.rmtree(local_src_dir) |
# Get a list of the files we uploaded to Google Storage. |
- (dirs, files) = list_bucket_contents( |
+ (dirs, files) = gs.list_bucket_contents( |
bucket=bucket, subdir=remote_dir) |
- assert dirs == [subdir] |
- assert files == [] |
- (dirs, files) = list_bucket_contents( |
+ assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) |
+ assert files == [], '%s == []' % files |
+ (dirs, files) = gs.list_bucket_contents( |
bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
- assert dirs == [] |
- assert files == filenames_to_upload |
+ assert dirs == [], '%s == []' % dirs |
+ assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) |
# Download the files we uploaded to Google Storage, and validate contents. |
local_dest_dir = tempfile.mkdtemp() |
try: |
for filename in filenames_to_upload: |
- download_file(source_bucket=bucket, |
- source_path=posixpath.join(remote_dir, subdir, filename), |
- dest_path=os.path.join(local_dest_dir, subdir, filename), |
- create_subdirs_if_needed=True) |
+ gs.download_file(source_bucket=bucket, |
+ source_path=posixpath.join(remote_dir, subdir, filename), |
+ dest_path=os.path.join(local_dest_dir, subdir, filename), |
+ create_subdirs_if_needed=True) |
with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
file_contents = f.read() |
- assert file_contents == 'contents of %s\n' % filename |
+ assert file_contents == 'contents of %s\n' % filename, ( |
+ '%s == "contents of %s\n"' % (file_contents, filename)) |
finally: |
shutil.rmtree(local_dest_dir) |
# Delete all the files we uploaded to Google Storage. |
for filename in filenames_to_upload: |
- delete_file(bucket=bucket, |
- path=posixpath.join(remote_dir, subdir, filename)) |
+ gs.delete_file(bucket=bucket, |
+ path=posixpath.join(remote_dir, subdir, filename)) |
# Confirm that we deleted all the files we uploaded to Google Storage. |
- (dirs, files) = list_bucket_contents( |
+ (dirs, files) = gs.list_bucket_contents( |
bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
- assert dirs == [] |
- assert files == [] |
+ assert dirs == [], '%s == []' % dirs |
+ assert files == [], '%s == []' % files |
# TODO(epoger): How should we exercise this self-test? |