Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(505)

Side by Side Diff: py/utils/gs_utils.py

Issue 387233003: gs_utils.py: use boto instead of google-api-python-client library so we can use .boto file credentia (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « DEPS ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301
3 """ 4 """
4 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
5 6
6 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
7 found in the LICENSE file. 8 found in the LICENSE file.
8 9
9 Utilities for accessing Google Cloud Storage. 10 Utilities for accessing Google Cloud Storage, using the boto library.
11
12 See http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial -using-boto.html
13 for implementation tips.
10 """ 14 """
15 # pylint: enable=C0301
11 16
12 # System-level imports 17 # System-level imports
18 import errno
13 import os 19 import os
14 import posixpath 20 import posixpath
21 import random
22 import re
23 import shutil
15 import sys 24 import sys
25 import tempfile
16 26
17 # Imports from third-party code 27 # Imports from third-party code
18 TRUNK_DIRECTORY = os.path.abspath(os.path.join( 28 TRUNK_DIRECTORY = os.path.abspath(os.path.join(
19 os.path.dirname(__file__), os.pardir, os.pardir)) 29 os.path.dirname(__file__), os.pardir, os.pardir))
20 for import_subdir in ['google-api-python-client', 'httplib2', 'oauth2client', 30 for import_subdir in ['boto']:
21 'uritemplate-py']:
22 import_dirpath = os.path.join( 31 import_dirpath = os.path.join(
23 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) 32 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir)
24 if import_dirpath not in sys.path: 33 if import_dirpath not in sys.path:
25 # We need to insert at the beginning of the path, to make sure that our 34 # We need to insert at the beginning of the path, to make sure that our
26 # imported versions are favored over others that might be in the path. 35 # imported versions are favored over others that might be in the path.
27 # Also, the google-api-python-client checkout contains an empty
28 # oauth2client directory, which will confuse things unless we insert
29 # our checked-out oauth2client in front of it in the path.
30 sys.path.insert(0, import_dirpath) 36 sys.path.insert(0, import_dirpath)
31 try: 37 from boto.gs.connection import GSConnection
32 from googleapiclient.discovery import build as build_service 38 from boto.gs.key import Key
33 except ImportError: 39 from boto.s3.bucketlistresultset import BucketListResultSet
34 # We should not require any googleapiclient dependencies to be 40 from boto.s3.prefix import Prefix
35 # installed at a system level, but in the meanwhile, if developers run into
36 # trouble they can install those system-level dependencies to get unblocked.
37 print ('We should not require any googleapiclient dependencies to be '
38 'installed at a system level, but it seems like some are missing. '
39 'Please install google-api-python-client to get those dependencies; '
40 'directions can be found at https://developers.google.com/'
41 'api-client-library/python/start/installation . '
42 'More details in http://skbug.com/2641 ')
43 raise
44 41
45 # Local imports 42
46 import url_utils 43 def delete_file(bucket, path):
44 """Delete a single file within a GS bucket.
45
46 TODO(epoger): what if bucket or path does not exist? Should probably raise
47 an exception. Implement, and add a test to exercise this.
48
49 Params:
50 bucket: GS bucket to delete a file from
51 path: full path (Posix-style) of the file within the bucket to delete
52 """
53 conn = _create_connection()
54 b = conn.get_bucket(bucket_name=bucket)
55 item = Key(b)
56 item.key = path
57 item.delete()
58
59
60 def upload_file(source_path, dest_bucket, dest_path):
61 """Upload contents of a local file to Google Storage.
62
63 TODO(epoger): Add the extra parameters provided by upload_file() within
64 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/u tils/old_gs_utils.py ,
65 so we can replace that function with this one.
66
67 params:
68 source_path: full path (local-OS-style) on local disk to read from
69 dest_bucket: GCS bucket to copy the file to
70 dest_path: full path (Posix-style) within that bucket
71 """
72 conn = _create_connection()
73 b = conn.get_bucket(bucket_name=dest_bucket)
74 item = Key(b)
75 item.key = dest_path
76 item.set_contents_from_filename(filename=source_path)
47 77
48 78
49 def download_file(source_bucket, source_path, dest_path, 79 def download_file(source_bucket, source_path, dest_path,
50 create_subdirs_if_needed=False): 80 create_subdirs_if_needed=False):
51 """ Downloads a single file from Google Cloud Storage to local disk. 81 """ Downloads a single file from Google Cloud Storage to local disk.
52 82
53 Args: 83 Args:
54 source_bucket: GCS bucket to download the file from 84 source_bucket: GCS bucket to download the file from
55 source_path: full path (Posix-style) within that bucket 85 source_path: full path (Posix-style) within that bucket
56 dest_path: full path (local-OS-style) on local disk to copy the file to 86 dest_path: full path (local-OS-style) on local disk to copy the file to
57 create_subdirs_if_needed: boolean; whether to create subdirectories as 87 create_subdirs_if_needed: boolean; whether to create subdirectories as
58 needed to create dest_path 88 needed to create dest_path
59 """ 89 """
60 source_http_url = posixpath.join( 90 conn = _create_connection()
61 'http://storage.googleapis.com', source_bucket, source_path) 91 b = conn.get_bucket(bucket_name=source_bucket)
62 url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path, 92 item = Key(b)
63 create_subdirs_if_needed=create_subdirs_if_needed) 93 item.key = source_path
94 if create_subdirs_if_needed:
95 _makedirs_if_needed(os.path.dirname(dest_path))
96 with open(dest_path, 'w') as f:
97 item.get_contents_to_file(fp=f)
64 98
65 99
66 def list_bucket_contents(bucket, subdir=None): 100 def list_bucket_contents(bucket, subdir=None):
67 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. 101 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
68 102
69 Uses the API documented at
70 https://developers.google.com/storage/docs/json_api/v1/objects/list
71
72 Args: 103 Args:
73 bucket: name of the Google Storage bucket 104 bucket: name of the Google Storage bucket
74 subdir: directory within the bucket to list, or None for root directory 105 subdir: directory within the bucket to list, or None for root directory
75 """ 106 """
76 # The GCS command relies on the subdir name (if any) ending with a slash. 107 # The GS command relies on the prefix (if any) ending with a slash.
77 if subdir and not subdir.endswith('/'): 108 prefix = subdir or ''
78 subdir += '/' 109 if prefix and not prefix.endswith('/'):
79 subdir_length = len(subdir) if subdir else 0 110 prefix += '/'
111 prefix_length = len(prefix) if prefix else 0
80 112
81 storage = build_service('storage', 'v1') 113 conn = _create_connection()
82 command = storage.objects().list( 114 b = conn.get_bucket(bucket_name=bucket)
83 bucket=bucket, delimiter='/', fields='items(name),prefixes', 115 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
84 prefix=subdir) 116 dirs = []
85 results = command.execute() 117 files = []
118 for item in lister:
119 t = type(item)
120 if t is Key:
121 files.append(item.key[prefix_length:])
122 elif t is Prefix:
123 dirs.append(item.name[prefix_length:-1])
124 return (dirs, files)
86 125
87 # The GCS command returned two subdicts: 126
88 # prefixes: the full path of every directory within subdir, with trailing '/' 127 def _config_file_as_dict(filepath):
89 # items: property dict for each file object within subdir 128 """Reads a boto-style config file into a dict.
90 # (including 'name', which is full path of the object) 129
91 dirs = [] 130 Parses all lines from the file of this form: key = value
92 for dir_fullpath in results.get('prefixes', []): 131 TODO(epoger): Create unittest.
93 dir_basename = dir_fullpath[subdir_length:] 132
94 dirs.append(dir_basename[:-1]) # strip trailing slash 133 Params:
95 files = [] 134 filepath: path to config file on local disk
96 for file_properties in results.get('items', []): 135
97 file_fullpath = file_properties['name'] 136 Returns: contents of the config file, as a dictionary
98 file_basename = file_fullpath[subdir_length:] 137
99 files.append(file_basename) 138 Raises exception if file not found.
100 return (dirs, files) 139 """
140 dic = {}
141 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$')
142 with open(filepath) as f:
143 for line in f:
144 match = line_regex.match(line)
145 if match:
146 (key, value) = match.groups()
147 dic[key] = value
148 return dic
149
150
151 def _create_connection(boto_file_path=os.path.join('~','.boto')):
152 """Returns a GSConnection object we can use to access Google Storage.
153
154 Params:
155 boto_file_path: full path (local-OS-style) on local disk where .boto
156 credentials file can be found
157
158 TODO(epoger): Change this module to be object-based, where __init__() reads
159 the boto file into boto_dict once instead of repeatedly for each operation.
160
161 TODO(epoger): if the file does not exist, rather than raising an exception,
162 create a GSConnection that can operate on public files.
163 """
164 boto_file_path = os.path.expanduser(boto_file_path)
165 print 'Reading boto file from %s' % boto_file_path
166 boto_dict = _config_file_as_dict(filepath=boto_file_path)
167 return GSConnection(
168 gs_access_key_id=boto_dict['gs_access_key_id'],
169 gs_secret_access_key=boto_dict['gs_secret_access_key'])
170
171
172 def _makedirs_if_needed(path):
173 """ Creates a directory (and any parent directories needed), if it does not
174 exist yet.
175
176 Args:
177 path: full path of directory to create
178 """
179 try:
180 os.makedirs(path)
181 except OSError as e:
182 if e.errno != errno.EEXIST:
183 raise
184
185
186 def _run_self_test():
187 bucket = 'chromium-skia-gm'
188 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
189 subdir = 'subdir'
190 filenames_to_upload = ['file1', 'file2']
191
192 # Upload test files to Google Storage.
193 local_src_dir = tempfile.mkdtemp()
194 os.mkdir(os.path.join(local_src_dir, subdir))
195 try:
196 for filename in filenames_to_upload:
197 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
198 f.write('contents of %s\n' % filename)
199 upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
200 dest_bucket=bucket,
201 dest_path=posixpath.join(remote_dir, subdir, filename))
202 finally:
203 shutil.rmtree(local_src_dir)
204
205 # Get a list of the files we uploaded to Google Storage.
206 (dirs, files) = list_bucket_contents(
207 bucket=bucket, subdir=remote_dir)
208 assert dirs == [subdir]
209 assert files == []
210 (dirs, files) = list_bucket_contents(
211 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
212 assert dirs == []
213 assert files == filenames_to_upload
214
215 # Download the files we uploaded to Google Storage, and validate contents.
216 local_dest_dir = tempfile.mkdtemp()
217 try:
218 for filename in filenames_to_upload:
219 download_file(source_bucket=bucket,
220 source_path=posixpath.join(remote_dir, subdir, filename),
221 dest_path=os.path.join(local_dest_dir, subdir, filename),
222 create_subdirs_if_needed=True)
223 with open(os.path.join(local_dest_dir, subdir, filename)) as f:
224 file_contents = f.read()
225 assert file_contents == 'contents of %s\n' % filename
226 finally:
227 shutil.rmtree(local_dest_dir)
228
229 # Delete all the files we uploaded to Google Storage.
230 for filename in filenames_to_upload:
231 delete_file(bucket=bucket,
232 path=posixpath.join(remote_dir, subdir, filename))
233
234 # Confirm that we deleted all the files we uploaded to Google Storage.
235 (dirs, files) = list_bucket_contents(
236 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
237 assert dirs == []
238 assert files == []
239
240
241 # TODO(epoger): How should we exercise this self-test?
242 # I avoided using the standard unittest framework, because these Google Storage
243 # operations are expensive and require .boto permissions.
244 #
245 # How can we automatically test this code without wasting too many resources
246 # or needing .boto permissions?
247 if __name__ == '__main__':
248 _run_self_test()
OLDNEW
« no previous file with comments | « DEPS ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698