| Index: tools/pyutils/gs_utils.py
|
| diff --git a/tools/pyutils/gs_utils.py b/tools/pyutils/gs_utils.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..2659c03e63198996d22a295fa008053173b59042
|
| --- /dev/null
|
| +++ b/tools/pyutils/gs_utils.py
|
| @@ -0,0 +1,81 @@
|
| +#!/usr/bin/python
|
| +
|
| +"""
|
| +Copyright 2014 Google Inc.
|
| +
|
| +Use of this source code is governed by a BSD-style license that can be
|
| +found in the LICENSE file.
|
| +
|
| +Utilities for accessing Google Cloud Storage.
|
| +
|
| +TODO(epoger): move this into tools/utils for broader use?
|
| +"""
|
| +
|
| +# System-level imports
|
| +import os
|
| +import posixpath
|
| +import sys
|
| +try:
|
| + from apiclient.discovery import build as build_service
|
| +except ImportError:
|
| + print ('Missing google-api-python-client. Please install it; directions '
|
| + 'can be found at https://developers.google.com/api-client-library/'
|
| + 'python/start/installation')
|
| + raise
|
| +
|
| +# Local imports
|
| +import url_utils
|
| +
|
| +
|
| +def download_file(source_bucket, source_path, dest_path,
|
| + create_subdirs_if_needed=False):
|
| + """ Downloads a single file from Google Cloud Storage to local disk.
|
| +
|
| + Args:
|
| + source_bucket: GCS bucket to download the file from
|
| + source_path: full path (Posix-style) within that bucket
|
| + dest_path: full path (local-OS-style) on local disk to copy the file to
|
| + create_subdirs_if_needed: boolean; whether to create subdirectories as
|
| + needed to create dest_path
|
| + """
|
| + source_http_url = posixpath.join(
|
| + 'http://storage.googleapis.com', source_bucket, source_path)
|
| + url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
|
| + create_subdirs_if_needed=create_subdirs_if_needed)
|
| +
|
| +
|
| +def list_bucket_contents(bucket, subdir=None):
|
| + """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
|
| +
|
| + Uses the API documented at
|
| + https://developers.google.com/storage/docs/json_api/v1/objects/list
|
| +
|
| + Args:
|
| + bucket: name of the Google Storage bucket
|
| + subdir: directory within the bucket to list, or None for root directory
|
| + """
|
| + # The GCS command relies on the subdir name (if any) ending with a slash.
|
| + if subdir and not subdir.endswith('/'):
|
| + subdir += '/'
|
| + subdir_length = len(subdir) if subdir else 0
|
| +
|
| + storage = build_service('storage', 'v1')
|
| + command = storage.objects().list(
|
| + bucket=bucket, delimiter='/', fields='items(name),prefixes',
|
| + prefix=subdir)
|
| + results = command.execute()
|
| +
|
| + # The GCS command returned two subdicts:
|
| + # prefixes: the full path of every directory within subdir, with trailing '/'
|
| + # items: property dict for each file object within subdir
|
| + # (including 'name', which is full path of the object)
|
| + dirs = []
|
| + for dir_fullpath in results.get('prefixes', []):
|
| + dir_basename = dir_fullpath[subdir_length:]
|
| + dirs.append(dir_basename[:-1]) # strip trailing slash
|
| + files = []
|
| + for file_properties in results.get('items', []):
|
| + file_fullpath = file_properties['name']
|
| + file_basename = file_fullpath[subdir_length:]
|
| + files.append(file_basename)
|
| + return (dirs, files)
|
|
|