Index: py/utils/gs_utils.py |
diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py |
index efab8ad96ed935f2f1ba91ef6d972cd1633bca52..c30295b7ea2f096acec73e0924f21df1a297da87 100755 |
--- a/py/utils/gs_utils.py |
+++ b/py/utils/gs_utils.py |
@@ -1,49 +1,79 @@ |
#!/usr/bin/python |
+# pylint: disable=C0301 |
""" |
Copyright 2014 Google Inc. |
Use of this source code is governed by a BSD-style license that can be |
found in the LICENSE file. |
-Utilities for accessing Google Cloud Storage. |
+Utilities for accessing Google Cloud Storage, using the boto library. |
+ |
+See http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-using-boto.html |
+for implementation tips. |
""" |
+# pylint: enable=C0301 |
# System-level imports |
+import errno |
import os |
import posixpath |
+import random |
+import re |
+import shutil |
import sys |
+import tempfile |
# Imports from third-party code |
TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
os.path.dirname(__file__), os.pardir, os.pardir)) |
-for import_subdir in ['google-api-python-client', 'httplib2', 'oauth2client', |
- 'uritemplate-py']: |
+for import_subdir in ['boto']: |
import_dirpath = os.path.join( |
TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) |
if import_dirpath not in sys.path: |
# We need to insert at the beginning of the path, to make sure that our |
# imported versions are favored over others that might be in the path. |
- # Also, the google-api-python-client checkout contains an empty |
- # oauth2client directory, which will confuse things unless we insert |
- # our checked-out oauth2client in front of it in the path. |
sys.path.insert(0, import_dirpath) |
-try: |
- from googleapiclient.discovery import build as build_service |
-except ImportError: |
- # We should not require any googleapiclient dependencies to be |
- # installed at a system level, but in the meanwhile, if developers run into |
- # trouble they can install those system-level dependencies to get unblocked. |
- print ('We should not require any googleapiclient dependencies to be ' |
- 'installed at a system level, but it seems like some are missing. ' |
- 'Please install google-api-python-client to get those dependencies; ' |
- 'directions can be found at https://developers.google.com/' |
- 'api-client-library/python/start/installation . ' |
- 'More details in http://skbug.com/2641 ') |
- raise |
- |
-# Local imports |
-import url_utils |
+from boto.gs.connection import GSConnection |
+from boto.gs.key import Key |
+from boto.s3.bucketlistresultset import BucketListResultSet |
+from boto.s3.prefix import Prefix |
+ |
+ |
+def delete_file(bucket, path): |
+ """Delete a single file within a GS bucket. |
+ |
+ TODO(epoger): what if bucket or path does not exist? Should probably raise |
+ an exception. Implement, and add a test to exercise this. |
+ |
+ Params: |
+ bucket: GS bucket to delete a file from |
+ path: full path (Posix-style) of the file within the bucket to delete |
+ """ |
+ conn = _create_connection() |
+ b = conn.get_bucket(bucket_name=bucket) |
+ item = Key(b) |
+ item.key = path |
+ item.delete() |
+ |
+ |
+def upload_file(source_path, dest_bucket, dest_path): |
+ """Upload contents of a local file to Google Storage. |
+ |
+ TODO(epoger): Add the extra parameters provided by upload_file() within |
+ https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py , |
+ so we can replace that function with this one. |
+ |
+ params: |
+ source_path: full path (local-OS-style) on local disk to read from |
+ dest_bucket: GCS bucket to copy the file to |
+ dest_path: full path (Posix-style) within that bucket |
+ """ |
+ conn = _create_connection() |
+ b = conn.get_bucket(bucket_name=dest_bucket) |
+ item = Key(b) |
+ item.key = dest_path |
+ item.set_contents_from_filename(filename=source_path) |
def download_file(source_bucket, source_path, dest_path, |
@@ -57,44 +87,162 @@ def download_file(source_bucket, source_path, dest_path, |
create_subdirs_if_needed: boolean; whether to create subdirectories as |
needed to create dest_path |
""" |
- source_http_url = posixpath.join( |
- 'http://storage.googleapis.com', source_bucket, source_path) |
- url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path, |
- create_subdirs_if_needed=create_subdirs_if_needed) |
+ conn = _create_connection() |
+ b = conn.get_bucket(bucket_name=source_bucket) |
+ item = Key(b) |
+ item.key = source_path |
+ if create_subdirs_if_needed: |
+ _makedirs_if_needed(os.path.dirname(dest_path)) |
+ with open(dest_path, 'w') as f: |
+ item.get_contents_to_file(fp=f) |
def list_bucket_contents(bucket, subdir=None): |
""" Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. |
- Uses the API documented at |
- https://developers.google.com/storage/docs/json_api/v1/objects/list |
- |
Args: |
bucket: name of the Google Storage bucket |
subdir: directory within the bucket to list, or None for root directory |
""" |
- # The GCS command relies on the subdir name (if any) ending with a slash. |
- if subdir and not subdir.endswith('/'): |
- subdir += '/' |
- subdir_length = len(subdir) if subdir else 0 |
- |
- storage = build_service('storage', 'v1') |
- command = storage.objects().list( |
- bucket=bucket, delimiter='/', fields='items(name),prefixes', |
- prefix=subdir) |
- results = command.execute() |
- |
- # The GCS command returned two subdicts: |
- # prefixes: the full path of every directory within subdir, with trailing '/' |
- # items: property dict for each file object within subdir |
- # (including 'name', which is full path of the object) |
+ # The GS command relies on the prefix (if any) ending with a slash. |
+ prefix = subdir or '' |
+ if prefix and not prefix.endswith('/'): |
+ prefix += '/' |
+ prefix_length = len(prefix) if prefix else 0 |
+ |
+ conn = _create_connection() |
+ b = conn.get_bucket(bucket_name=bucket) |
+ lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
dirs = [] |
- for dir_fullpath in results.get('prefixes', []): |
- dir_basename = dir_fullpath[subdir_length:] |
- dirs.append(dir_basename[:-1]) # strip trailing slash |
files = [] |
- for file_properties in results.get('items', []): |
- file_fullpath = file_properties['name'] |
- file_basename = file_fullpath[subdir_length:] |
- files.append(file_basename) |
+ for item in lister: |
+ t = type(item) |
+ if t is Key: |
+ files.append(item.key[prefix_length:]) |
+ elif t is Prefix: |
+ dirs.append(item.name[prefix_length:-1]) |
return (dirs, files) |
+ |
+ |
+def _config_file_as_dict(filepath): |
+ """Reads a boto-style config file into a dict. |
+ |
+ Parses all lines from the file of this form: key = value |
+ TODO(epoger): Create unittest. |
+ |
+ Params: |
+ filepath: path to config file on local disk |
+ |
+ Returns: contents of the config file, as a dictionary |
+ |
+ Raises exception if file not found. |
+ """ |
+ dic = {} |
+ line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') |
+ with open(filepath) as f: |
+ for line in f: |
+ match = line_regex.match(line) |
+ if match: |
+ (key, value) = match.groups() |
+ dic[key] = value |
+ return dic |
+ |
+ |
+def _create_connection(boto_file_path=os.path.join('~','.boto')): |
+ """Returns a GSConnection object we can use to access Google Storage. |
+ |
+ Params: |
+ boto_file_path: full path (local-OS-style) on local disk where .boto |
+ credentials file can be found |
+ |
+ TODO(epoger): Change this module to be object-based, where __init__() reads |
+ the boto file into boto_dict once instead of repeatedly for each operation. |
+ |
+ TODO(epoger): if the file does not exist, rather than raising an exception, |
+ create a GSConnection that can operate on public files. |
+ """ |
+ boto_file_path = os.path.expanduser(boto_file_path) |
+ print 'Reading boto file from %s' % boto_file_path |
+ boto_dict = _config_file_as_dict(filepath=boto_file_path) |
+ return GSConnection( |
+ gs_access_key_id=boto_dict['gs_access_key_id'], |
+ gs_secret_access_key=boto_dict['gs_secret_access_key']) |
+ |
+ |
+def _makedirs_if_needed(path): |
+ """ Creates a directory (and any parent directories needed), if it does not |
+ exist yet. |
+ |
+ Args: |
+ path: full path of directory to create |
+ """ |
+ try: |
+ os.makedirs(path) |
+ except OSError as e: |
+ if e.errno != errno.EEXIST: |
+ raise |
+ |
+ |
+def _run_self_test(): |
+ bucket = 'chromium-skia-gm' |
+ remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
+ subdir = 'subdir' |
+ filenames_to_upload = ['file1', 'file2'] |
+ |
+ # Upload test files to Google Storage. |
+ local_src_dir = tempfile.mkdtemp() |
+ os.mkdir(os.path.join(local_src_dir, subdir)) |
+ try: |
+ for filename in filenames_to_upload: |
+ with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
+ f.write('contents of %s\n' % filename) |
+ upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |
+ dest_bucket=bucket, |
+ dest_path=posixpath.join(remote_dir, subdir, filename)) |
+ finally: |
+ shutil.rmtree(local_src_dir) |
+ |
+ # Get a list of the files we uploaded to Google Storage. |
+ (dirs, files) = list_bucket_contents( |
+ bucket=bucket, subdir=remote_dir) |
+ assert dirs == [subdir] |
+ assert files == [] |
+ (dirs, files) = list_bucket_contents( |
+ bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
+ assert dirs == [] |
+ assert files == filenames_to_upload |
+ |
+ # Download the files we uploaded to Google Storage, and validate contents. |
+ local_dest_dir = tempfile.mkdtemp() |
+ try: |
+ for filename in filenames_to_upload: |
+ download_file(source_bucket=bucket, |
+ source_path=posixpath.join(remote_dir, subdir, filename), |
+ dest_path=os.path.join(local_dest_dir, subdir, filename), |
+ create_subdirs_if_needed=True) |
+ with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
+ file_contents = f.read() |
+ assert file_contents == 'contents of %s\n' % filename |
+ finally: |
+ shutil.rmtree(local_dest_dir) |
+ |
+ # Delete all the files we uploaded to Google Storage. |
+ for filename in filenames_to_upload: |
+ delete_file(bucket=bucket, |
+ path=posixpath.join(remote_dir, subdir, filename)) |
+ |
+ # Confirm that we deleted all the files we uploaded to Google Storage. |
+ (dirs, files) = list_bucket_contents( |
+ bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
+ assert dirs == [] |
+ assert files == [] |
+ |
+ |
+# TODO(epoger): How should we exercise this self-test? |
+# I avoided using the standard unittest framework, because these Google Storage |
+# operations are expensive and require .boto permissions. |
+# |
+# How can we automatically test this code without wasting too many resources |
+# or needing .boto permissions? |
+if __name__ == '__main__': |
+ _run_self_test() |