OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
| 3 # pylint: disable=C0301 |
3 """ | 4 """ |
4 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
5 | 6 |
6 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
7 found in the LICENSE file. | 8 found in the LICENSE file. |
8 | 9 |
9 Utilities for accessing Google Cloud Storage. | 10 Utilities for accessing Google Cloud Storage, using the boto library. |
| 11 |
| 12 See http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial
-using-boto.html |
| 13 for implementation tips. |
10 """ | 14 """ |
| 15 # pylint: enable=C0301 |
11 | 16 |
12 # System-level imports | 17 # System-level imports |
| 18 import errno |
13 import os | 19 import os |
14 import posixpath | 20 import posixpath |
| 21 import random |
| 22 import re |
| 23 import shutil |
15 import sys | 24 import sys |
| 25 import tempfile |
16 | 26 |
17 # Imports from third-party code | 27 # Imports from third-party code |
18 TRUNK_DIRECTORY = os.path.abspath(os.path.join( | 28 TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
19 os.path.dirname(__file__), os.pardir, os.pardir)) | 29 os.path.dirname(__file__), os.pardir, os.pardir)) |
20 for import_subdir in ['google-api-python-client', 'httplib2', 'oauth2client', | 30 for import_subdir in ['boto']: |
21 'uritemplate-py']: | |
22 import_dirpath = os.path.join( | 31 import_dirpath = os.path.join( |
23 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) | 32 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) |
24 if import_dirpath not in sys.path: | 33 if import_dirpath not in sys.path: |
25 # We need to insert at the beginning of the path, to make sure that our | 34 # We need to insert at the beginning of the path, to make sure that our |
26 # imported versions are favored over others that might be in the path. | 35 # imported versions are favored over others that might be in the path. |
27 # Also, the google-api-python-client checkout contains an empty | |
28 # oauth2client directory, which will confuse things unless we insert | |
29 # our checked-out oauth2client in front of it in the path. | |
30 sys.path.insert(0, import_dirpath) | 36 sys.path.insert(0, import_dirpath) |
31 try: | 37 from boto.gs.connection import GSConnection |
32 from googleapiclient.discovery import build as build_service | 38 from boto.gs.key import Key |
33 except ImportError: | 39 from boto.s3.bucketlistresultset import BucketListResultSet |
34 # We should not require any googleapiclient dependencies to be | 40 from boto.s3.prefix import Prefix |
35 # installed at a system level, but in the meanwhile, if developers run into | |
36 # trouble they can install those system-level dependencies to get unblocked. | |
37 print ('We should not require any googleapiclient dependencies to be ' | |
38 'installed at a system level, but it seems like some are missing. ' | |
39 'Please install google-api-python-client to get those dependencies; ' | |
40 'directions can be found at https://developers.google.com/' | |
41 'api-client-library/python/start/installation . ' | |
42 'More details in http://skbug.com/2641 ') | |
43 raise | |
44 | 41 |
45 # Local imports | 42 |
46 import url_utils | 43 def delete_file(bucket, path): |
| 44 """Delete a single file within a GS bucket. |
| 45 |
| 46 TODO(epoger): what if bucket or path does not exist? Should probably raise |
| 47 an exception. Implement, and add a test to exercise this. |
| 48 |
| 49 Params: |
| 50 bucket: GS bucket to delete a file from |
| 51 path: full path (Posix-style) of the file within the bucket to delete |
| 52 """ |
| 53 conn = _create_connection() |
| 54 b = conn.get_bucket(bucket_name=bucket) |
| 55 item = Key(b) |
| 56 item.key = path |
| 57 item.delete() |
| 58 |
| 59 |
| 60 def upload_file(source_path, dest_bucket, dest_path): |
| 61 """Upload contents of a local file to Google Storage. |
| 62 |
| 63 TODO(epoger): Add the extra parameters provided by upload_file() within |
| 64 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/u
tils/old_gs_utils.py , |
| 65 so we can replace that function with this one. |
| 66 |
| 67 params: |
| 68 source_path: full path (local-OS-style) on local disk to read from |
| 69 dest_bucket: GCS bucket to copy the file to |
| 70 dest_path: full path (Posix-style) within that bucket |
| 71 """ |
| 72 conn = _create_connection() |
| 73 b = conn.get_bucket(bucket_name=dest_bucket) |
| 74 item = Key(b) |
| 75 item.key = dest_path |
| 76 item.set_contents_from_filename(filename=source_path) |
47 | 77 |
48 | 78 |
49 def download_file(source_bucket, source_path, dest_path, | 79 def download_file(source_bucket, source_path, dest_path, |
50 create_subdirs_if_needed=False): | 80 create_subdirs_if_needed=False): |
51 """ Downloads a single file from Google Cloud Storage to local disk. | 81 """ Downloads a single file from Google Cloud Storage to local disk. |
52 | 82 |
53 Args: | 83 Args: |
54 source_bucket: GCS bucket to download the file from | 84 source_bucket: GCS bucket to download the file from |
55 source_path: full path (Posix-style) within that bucket | 85 source_path: full path (Posix-style) within that bucket |
56 dest_path: full path (local-OS-style) on local disk to copy the file to | 86 dest_path: full path (local-OS-style) on local disk to copy the file to |
57 create_subdirs_if_needed: boolean; whether to create subdirectories as | 87 create_subdirs_if_needed: boolean; whether to create subdirectories as |
58 needed to create dest_path | 88 needed to create dest_path |
59 """ | 89 """ |
60 source_http_url = posixpath.join( | 90 conn = _create_connection() |
61 'http://storage.googleapis.com', source_bucket, source_path) | 91 b = conn.get_bucket(bucket_name=source_bucket) |
62 url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path, | 92 item = Key(b) |
63 create_subdirs_if_needed=create_subdirs_if_needed) | 93 item.key = source_path |
| 94 if create_subdirs_if_needed: |
| 95 _makedirs_if_needed(os.path.dirname(dest_path)) |
| 96 with open(dest_path, 'w') as f: |
| 97 item.get_contents_to_file(fp=f) |
64 | 98 |
65 | 99 |
66 def list_bucket_contents(bucket, subdir=None): | 100 def list_bucket_contents(bucket, subdir=None): |
67 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. | 101 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. |
68 | 102 |
69 Uses the API documented at | |
70 https://developers.google.com/storage/docs/json_api/v1/objects/list | |
71 | |
72 Args: | 103 Args: |
73 bucket: name of the Google Storage bucket | 104 bucket: name of the Google Storage bucket |
74 subdir: directory within the bucket to list, or None for root directory | 105 subdir: directory within the bucket to list, or None for root directory |
75 """ | 106 """ |
76 # The GCS command relies on the subdir name (if any) ending with a slash. | 107 # The GS command relies on the prefix (if any) ending with a slash. |
77 if subdir and not subdir.endswith('/'): | 108 prefix = subdir or '' |
78 subdir += '/' | 109 if prefix and not prefix.endswith('/'): |
79 subdir_length = len(subdir) if subdir else 0 | 110 prefix += '/' |
| 111 prefix_length = len(prefix) if prefix else 0 |
80 | 112 |
81 storage = build_service('storage', 'v1') | 113 conn = _create_connection() |
82 command = storage.objects().list( | 114 b = conn.get_bucket(bucket_name=bucket) |
83 bucket=bucket, delimiter='/', fields='items(name),prefixes', | 115 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
84 prefix=subdir) | 116 dirs = [] |
85 results = command.execute() | 117 files = [] |
| 118 for item in lister: |
| 119 t = type(item) |
| 120 if t is Key: |
| 121 files.append(item.key[prefix_length:]) |
| 122 elif t is Prefix: |
| 123 dirs.append(item.name[prefix_length:-1]) |
| 124 return (dirs, files) |
86 | 125 |
87 # The GCS command returned two subdicts: | 126 |
88 # prefixes: the full path of every directory within subdir, with trailing '/' | 127 def _config_file_as_dict(filepath): |
89 # items: property dict for each file object within subdir | 128 """Reads a boto-style config file into a dict. |
90 # (including 'name', which is full path of the object) | 129 |
91 dirs = [] | 130 Parses all lines from the file of this form: key = value |
92 for dir_fullpath in results.get('prefixes', []): | 131 TODO(epoger): Create unittest. |
93 dir_basename = dir_fullpath[subdir_length:] | 132 |
94 dirs.append(dir_basename[:-1]) # strip trailing slash | 133 Params: |
95 files = [] | 134 filepath: path to config file on local disk |
96 for file_properties in results.get('items', []): | 135 |
97 file_fullpath = file_properties['name'] | 136 Returns: contents of the config file, as a dictionary |
98 file_basename = file_fullpath[subdir_length:] | 137 |
99 files.append(file_basename) | 138 Raises exception if file not found. |
100 return (dirs, files) | 139 """ |
| 140 dic = {} |
| 141 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') |
| 142 with open(filepath) as f: |
| 143 for line in f: |
| 144 match = line_regex.match(line) |
| 145 if match: |
| 146 (key, value) = match.groups() |
| 147 dic[key] = value |
| 148 return dic |
| 149 |
| 150 |
| 151 def _create_connection(boto_file_path=os.path.join('~','.boto')): |
| 152 """Returns a GSConnection object we can use to access Google Storage. |
| 153 |
| 154 Params: |
| 155 boto_file_path: full path (local-OS-style) on local disk where .boto |
| 156 credentials file can be found |
| 157 |
| 158 TODO(epoger): Change this module to be object-based, where __init__() reads |
| 159 the boto file into boto_dict once instead of repeatedly for each operation. |
| 160 |
| 161 TODO(epoger): if the file does not exist, rather than raising an exception, |
| 162 create a GSConnection that can operate on public files. |
| 163 """ |
| 164 boto_file_path = os.path.expanduser(boto_file_path) |
| 165 print 'Reading boto file from %s' % boto_file_path |
| 166 boto_dict = _config_file_as_dict(filepath=boto_file_path) |
| 167 return GSConnection( |
| 168 gs_access_key_id=boto_dict['gs_access_key_id'], |
| 169 gs_secret_access_key=boto_dict['gs_secret_access_key']) |
| 170 |
| 171 |
| 172 def _makedirs_if_needed(path): |
| 173 """ Creates a directory (and any parent directories needed), if it does not |
| 174 exist yet. |
| 175 |
| 176 Args: |
| 177 path: full path of directory to create |
| 178 """ |
| 179 try: |
| 180 os.makedirs(path) |
| 181 except OSError as e: |
| 182 if e.errno != errno.EEXIST: |
| 183 raise |
| 184 |
| 185 |
| 186 def _run_self_test(): |
| 187 bucket = 'chromium-skia-gm' |
| 188 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
| 189 subdir = 'subdir' |
| 190 filenames_to_upload = ['file1', 'file2'] |
| 191 |
| 192 # Upload test files to Google Storage. |
| 193 local_src_dir = tempfile.mkdtemp() |
| 194 os.mkdir(os.path.join(local_src_dir, subdir)) |
| 195 try: |
| 196 for filename in filenames_to_upload: |
| 197 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
| 198 f.write('contents of %s\n' % filename) |
| 199 upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |
| 200 dest_bucket=bucket, |
| 201 dest_path=posixpath.join(remote_dir, subdir, filename)) |
| 202 finally: |
| 203 shutil.rmtree(local_src_dir) |
| 204 |
| 205 # Get a list of the files we uploaded to Google Storage. |
| 206 (dirs, files) = list_bucket_contents( |
| 207 bucket=bucket, subdir=remote_dir) |
| 208 assert dirs == [subdir] |
| 209 assert files == [] |
| 210 (dirs, files) = list_bucket_contents( |
| 211 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
| 212 assert dirs == [] |
| 213 assert files == filenames_to_upload |
| 214 |
| 215 # Download the files we uploaded to Google Storage, and validate contents. |
| 216 local_dest_dir = tempfile.mkdtemp() |
| 217 try: |
| 218 for filename in filenames_to_upload: |
| 219 download_file(source_bucket=bucket, |
| 220 source_path=posixpath.join(remote_dir, subdir, filename), |
| 221 dest_path=os.path.join(local_dest_dir, subdir, filename), |
| 222 create_subdirs_if_needed=True) |
| 223 with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
| 224 file_contents = f.read() |
| 225 assert file_contents == 'contents of %s\n' % filename |
| 226 finally: |
| 227 shutil.rmtree(local_dest_dir) |
| 228 |
| 229 # Delete all the files we uploaded to Google Storage. |
| 230 for filename in filenames_to_upload: |
| 231 delete_file(bucket=bucket, |
| 232 path=posixpath.join(remote_dir, subdir, filename)) |
| 233 |
| 234 # Confirm that we deleted all the files we uploaded to Google Storage. |
| 235 (dirs, files) = list_bucket_contents( |
| 236 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
| 237 assert dirs == [] |
| 238 assert files == [] |
| 239 |
| 240 |
| 241 # TODO(epoger): How should we exercise this self-test? |
| 242 # I avoided using the standard unittest framework, because these Google Storage |
| 243 # operations are expensive and require .boto permissions. |
| 244 # |
| 245 # How can we automatically test this code without wasting too many resources |
| 246 # or needing .boto permissions? |
| 247 if __name__ == '__main__': |
| 248 _run_self_test() |
OLD | NEW |