Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1598)

Unified Diff: py/utils/gs_utils.py

Issue 387233003: gs_utils.py: use boto instead of google-api-python-client library so we can use .boto file credentia (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « DEPS ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: py/utils/gs_utils.py
diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py
index efab8ad96ed935f2f1ba91ef6d972cd1633bca52..c30295b7ea2f096acec73e0924f21df1a297da87 100755
--- a/py/utils/gs_utils.py
+++ b/py/utils/gs_utils.py
@@ -1,49 +1,79 @@
#!/usr/bin/python
+# pylint: disable=C0301
"""
Copyright 2014 Google Inc.
Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.
-Utilities for accessing Google Cloud Storage.
+Utilities for accessing Google Cloud Storage, using the boto library.
+
+See http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-using-boto.html
+for implementation tips.
"""
+# pylint: enable=C0301
# System-level imports
+import errno
import os
import posixpath
+import random
+import re
+import shutil
import sys
+import tempfile
# Imports from third-party code
TRUNK_DIRECTORY = os.path.abspath(os.path.join(
os.path.dirname(__file__), os.pardir, os.pardir))
-for import_subdir in ['google-api-python-client', 'httplib2', 'oauth2client',
- 'uritemplate-py']:
+for import_subdir in ['boto']:
import_dirpath = os.path.join(
TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir)
if import_dirpath not in sys.path:
# We need to insert at the beginning of the path, to make sure that our
# imported versions are favored over others that might be in the path.
- # Also, the google-api-python-client checkout contains an empty
- # oauth2client directory, which will confuse things unless we insert
- # our checked-out oauth2client in front of it in the path.
sys.path.insert(0, import_dirpath)
-try:
- from googleapiclient.discovery import build as build_service
-except ImportError:
- # We should not require any googleapiclient dependencies to be
- # installed at a system level, but in the meanwhile, if developers run into
- # trouble they can install those system-level dependencies to get unblocked.
- print ('We should not require any googleapiclient dependencies to be '
- 'installed at a system level, but it seems like some are missing. '
- 'Please install google-api-python-client to get those dependencies; '
- 'directions can be found at https://developers.google.com/'
- 'api-client-library/python/start/installation . '
- 'More details in http://skbug.com/2641 ')
- raise
-
-# Local imports
-import url_utils
+from boto.gs.connection import GSConnection
+from boto.gs.key import Key
+from boto.s3.bucketlistresultset import BucketListResultSet
+from boto.s3.prefix import Prefix
+
+
+def delete_file(bucket, path):
+ """Delete a single file within a GS bucket.
+
+ TODO(epoger): what if bucket or path does not exist? Should probably raise
+ an exception. Implement, and add a test to exercise this.
+
+ Params:
+ bucket: GS bucket to delete a file from
+ path: full path (Posix-style) of the file within the bucket to delete
+ """
+ conn = _create_connection()
+ b = conn.get_bucket(bucket_name=bucket)
+ item = Key(b)
+ item.key = path
+ item.delete()
+
+
+def upload_file(source_path, dest_bucket, dest_path):
+ """Upload contents of a local file to Google Storage.
+
+ TODO(epoger): Add the extra parameters provided by upload_file() within
+ https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py ,
+ so we can replace that function with this one.
+
+ params:
+ source_path: full path (local-OS-style) on local disk to read from
+ dest_bucket: GCS bucket to copy the file to
+ dest_path: full path (Posix-style) within that bucket
+ """
+ conn = _create_connection()
+ b = conn.get_bucket(bucket_name=dest_bucket)
+ item = Key(b)
+ item.key = dest_path
+ item.set_contents_from_filename(filename=source_path)
def download_file(source_bucket, source_path, dest_path,
@@ -57,44 +87,162 @@ def download_file(source_bucket, source_path, dest_path,
create_subdirs_if_needed: boolean; whether to create subdirectories as
needed to create dest_path
"""
- source_http_url = posixpath.join(
- 'http://storage.googleapis.com', source_bucket, source_path)
- url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
- create_subdirs_if_needed=create_subdirs_if_needed)
+ conn = _create_connection()
+ b = conn.get_bucket(bucket_name=source_bucket)
+ item = Key(b)
+ item.key = source_path
+ if create_subdirs_if_needed:
+ _makedirs_if_needed(os.path.dirname(dest_path))
+ with open(dest_path, 'w') as f:
+ item.get_contents_to_file(fp=f)
def list_bucket_contents(bucket, subdir=None):
""" Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
- Uses the API documented at
- https://developers.google.com/storage/docs/json_api/v1/objects/list
-
Args:
bucket: name of the Google Storage bucket
subdir: directory within the bucket to list, or None for root directory
"""
- # The GCS command relies on the subdir name (if any) ending with a slash.
- if subdir and not subdir.endswith('/'):
- subdir += '/'
- subdir_length = len(subdir) if subdir else 0
-
- storage = build_service('storage', 'v1')
- command = storage.objects().list(
- bucket=bucket, delimiter='/', fields='items(name),prefixes',
- prefix=subdir)
- results = command.execute()
-
- # The GCS command returned two subdicts:
- # prefixes: the full path of every directory within subdir, with trailing '/'
- # items: property dict for each file object within subdir
- # (including 'name', which is full path of the object)
+ # The GS command relies on the prefix (if any) ending with a slash.
+ prefix = subdir or ''
+ if prefix and not prefix.endswith('/'):
+ prefix += '/'
+ prefix_length = len(prefix) if prefix else 0
+
+ conn = _create_connection()
+ b = conn.get_bucket(bucket_name=bucket)
+ lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
dirs = []
- for dir_fullpath in results.get('prefixes', []):
- dir_basename = dir_fullpath[subdir_length:]
- dirs.append(dir_basename[:-1]) # strip trailing slash
files = []
- for file_properties in results.get('items', []):
- file_fullpath = file_properties['name']
- file_basename = file_fullpath[subdir_length:]
- files.append(file_basename)
+ for item in lister:
+ t = type(item)
+ if t is Key:
+ files.append(item.key[prefix_length:])
+ elif t is Prefix:
+ dirs.append(item.name[prefix_length:-1])
return (dirs, files)
+
+
+def _config_file_as_dict(filepath):
+ """Reads a boto-style config file into a dict.
+
+ Parses all lines from the file of this form: key = value
+ TODO(epoger): Create unittest.
+
+ Params:
+ filepath: path to config file on local disk
+
+ Returns: contents of the config file, as a dictionary
+
+ Raises exception if file not found.
+ """
+ dic = {}
+ line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$')
+ with open(filepath) as f:
+ for line in f:
+ match = line_regex.match(line)
+ if match:
+ (key, value) = match.groups()
+ dic[key] = value
+ return dic
+
+
+def _create_connection(boto_file_path=os.path.join('~','.boto')):
+ """Returns a GSConnection object we can use to access Google Storage.
+
+ Params:
+ boto_file_path: full path (local-OS-style) on local disk where .boto
+ credentials file can be found
+
+ TODO(epoger): Change this module to be object-based, where __init__() reads
+ the boto file into boto_dict once instead of repeatedly for each operation.
+
+ TODO(epoger): if the file does not exist, rather than raising an exception,
+ create a GSConnection that can operate on public files.
+ """
+ boto_file_path = os.path.expanduser(boto_file_path)
+ print 'Reading boto file from %s' % boto_file_path
+ boto_dict = _config_file_as_dict(filepath=boto_file_path)
+ return GSConnection(
+ gs_access_key_id=boto_dict['gs_access_key_id'],
+ gs_secret_access_key=boto_dict['gs_secret_access_key'])
+
+
+def _makedirs_if_needed(path):
+ """ Creates a directory (and any parent directories needed), if it does not
+ exist yet.
+
+ Args:
+ path: full path of directory to create
+ """
+ try:
+ os.makedirs(path)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+
+
+def _run_self_test():
+ bucket = 'chromium-skia-gm'
+ remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
+ subdir = 'subdir'
+ filenames_to_upload = ['file1', 'file2']
+
+ # Upload test files to Google Storage.
+ local_src_dir = tempfile.mkdtemp()
+ os.mkdir(os.path.join(local_src_dir, subdir))
+ try:
+ for filename in filenames_to_upload:
+ with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
+ f.write('contents of %s\n' % filename)
+ upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
+ dest_bucket=bucket,
+ dest_path=posixpath.join(remote_dir, subdir, filename))
+ finally:
+ shutil.rmtree(local_src_dir)
+
+ # Get a list of the files we uploaded to Google Storage.
+ (dirs, files) = list_bucket_contents(
+ bucket=bucket, subdir=remote_dir)
+ assert dirs == [subdir]
+ assert files == []
+ (dirs, files) = list_bucket_contents(
+ bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
+ assert dirs == []
+ assert files == filenames_to_upload
+
+ # Download the files we uploaded to Google Storage, and validate contents.
+ local_dest_dir = tempfile.mkdtemp()
+ try:
+ for filename in filenames_to_upload:
+ download_file(source_bucket=bucket,
+ source_path=posixpath.join(remote_dir, subdir, filename),
+ dest_path=os.path.join(local_dest_dir, subdir, filename),
+ create_subdirs_if_needed=True)
+ with open(os.path.join(local_dest_dir, subdir, filename)) as f:
+ file_contents = f.read()
+ assert file_contents == 'contents of %s\n' % filename
+ finally:
+ shutil.rmtree(local_dest_dir)
+
+ # Delete all the files we uploaded to Google Storage.
+ for filename in filenames_to_upload:
+ delete_file(bucket=bucket,
+ path=posixpath.join(remote_dir, subdir, filename))
+
+ # Confirm that we deleted all the files we uploaded to Google Storage.
+ (dirs, files) = list_bucket_contents(
+ bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
+ assert dirs == []
+ assert files == []
+
+
+# TODO(epoger): How should we exercise this self-test?
+# I avoided using the standard unittest framework, because these Google Storage
+# operations are expensive and require .boto permissions.
+#
+# How can we automatically test this code without wasting too many resources
+# or needing .boto permissions?
+if __name__ == '__main__':
+ _run_self_test()
« no previous file with comments | « DEPS ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698