Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(254)

Side by Side Diff: py/utils/gs_utils.py

Issue 385013004: make gs_utils.py class-based, so credentials file can be read just once (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: Ravi comment Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library. 10 Utilities for accessing Google Cloud Storage, using the boto library.
(...skipping 22 matching lines...) Expand all
33 if import_dirpath not in sys.path: 33 if import_dirpath not in sys.path:
34 # We need to insert at the beginning of the path, to make sure that our 34 # We need to insert at the beginning of the path, to make sure that our
35 # imported versions are favored over others that might be in the path. 35 # imported versions are favored over others that might be in the path.
36 sys.path.insert(0, import_dirpath) 36 sys.path.insert(0, import_dirpath)
37 from boto.gs.connection import GSConnection 37 from boto.gs.connection import GSConnection
38 from boto.gs.key import Key 38 from boto.gs.key import Key
39 from boto.s3.bucketlistresultset import BucketListResultSet 39 from boto.s3.bucketlistresultset import BucketListResultSet
40 from boto.s3.prefix import Prefix 40 from boto.s3.prefix import Prefix
41 41
42 42
43 def delete_file(bucket, path): 43 class GSUtils(object):
44 """Delete a single file within a GS bucket. 44 """Utilities for accessing Google Cloud Storage, using the boto library."""
45 45
46 TODO(epoger): what if bucket or path does not exist? Should probably raise 46 def __init__(self, boto_file_path=os.path.join('~','.boto')):
47 an exception. Implement, and add a test to exercise this. 47 """Constructor.
48 48
49 Params: 49 Params:
50 bucket: GS bucket to delete a file from 50 boto_file_path: full path (local-OS-style) on local disk where .boto
51 path: full path (Posix-style) of the file within the bucket to delete 51 credentials file can be found. An exception is thrown if this file
52 """ 52 is missing.
53 conn = _create_connection() 53 TODO(epoger): Change missing-file behavior: allow the caller to
54 b = conn.get_bucket(bucket_name=bucket) 54 operate on public files in Google Storage.
55 item = Key(b) 55 """
56 item.key = path 56 boto_file_path = os.path.expanduser(boto_file_path)
57 item.delete() 57 print 'Reading boto file from %s' % boto_file_path
58 boto_dict = _config_file_as_dict(filepath=boto_file_path)
59 self._gs_access_key_id = boto_dict['gs_access_key_id']
60 self._gs_secret_access_key = boto_dict['gs_secret_access_key']
58 61
62 def delete_file(self, bucket, path):
63 """Delete a single file within a GS bucket.
59 64
60 def upload_file(source_path, dest_bucket, dest_path): 65 TODO(epoger): what if bucket or path does not exist? Should probably raise
61 """Upload contents of a local file to Google Storage. 66 an exception. Implement, and add a test to exercise this.
62 67
63 TODO(epoger): Add the extra parameters provided by upload_file() within 68 Params:
64 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/u tils/old_gs_utils.py , 69 bucket: GS bucket to delete a file from
65 so we can replace that function with this one. 70 path: full path (Posix-style) of the file within the bucket to delete
71 """
72 conn = self._create_connection()
73 b = conn.get_bucket(bucket_name=bucket)
74 item = Key(b)
75 item.key = path
76 item.delete()
66 77
67 params: 78 def upload_file(self, source_path, dest_bucket, dest_path):
68 source_path: full path (local-OS-style) on local disk to read from 79 """Upload contents of a local file to Google Storage.
69 dest_bucket: GCS bucket to copy the file to
70 dest_path: full path (Posix-style) within that bucket
71 """
72 conn = _create_connection()
73 b = conn.get_bucket(bucket_name=dest_bucket)
74 item = Key(b)
75 item.key = dest_path
76 item.set_contents_from_filename(filename=source_path)
77 80
81 TODO(epoger): Add the extra parameters provided by upload_file() within
82 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts /utils/old_gs_utils.py ,
83 so we can replace that function with this one.
78 84
79 def download_file(source_bucket, source_path, dest_path, 85 params:
80 create_subdirs_if_needed=False): 86 source_path: full path (local-OS-style) on local disk to read from
81 """ Downloads a single file from Google Cloud Storage to local disk. 87 dest_bucket: GCS bucket to copy the file to
88 dest_path: full path (Posix-style) within that bucket
89 """
90 conn = self._create_connection()
91 b = conn.get_bucket(bucket_name=dest_bucket)
92 item = Key(b)
93 item.key = dest_path
94 item.set_contents_from_filename(filename=source_path)
82 95
83 Args: 96 def download_file(self, source_bucket, source_path, dest_path,
84 source_bucket: GCS bucket to download the file from 97 create_subdirs_if_needed=False):
85 source_path: full path (Posix-style) within that bucket 98 """Downloads a single file from Google Cloud Storage to local disk.
86 dest_path: full path (local-OS-style) on local disk to copy the file to
87 create_subdirs_if_needed: boolean; whether to create subdirectories as
88 needed to create dest_path
89 """
90 conn = _create_connection()
91 b = conn.get_bucket(bucket_name=source_bucket)
92 item = Key(b)
93 item.key = source_path
94 if create_subdirs_if_needed:
95 _makedirs_if_needed(os.path.dirname(dest_path))
96 with open(dest_path, 'w') as f:
97 item.get_contents_to_file(fp=f)
98 99
100 Args:
101 source_bucket: GCS bucket to download the file from
102 source_path: full path (Posix-style) within that bucket
103 dest_path: full path (local-OS-style) on local disk to copy the file to
104 create_subdirs_if_needed: boolean; whether to create subdirectories as
105 needed to create dest_path
106 """
107 conn = self._create_connection()
108 b = conn.get_bucket(bucket_name=source_bucket)
109 item = Key(b)
110 item.key = source_path
111 if create_subdirs_if_needed:
112 _makedirs_if_needed(os.path.dirname(dest_path))
113 with open(dest_path, 'w') as f:
114 item.get_contents_to_file(fp=f)
99 115
100 def list_bucket_contents(bucket, subdir=None): 116 def list_bucket_contents(self, bucket, subdir=None):
101 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. 117 """Returns files in the Google Storage bucket as a (dirs, files) tuple.
102 118
103 Args: 119 Args:
104 bucket: name of the Google Storage bucket 120 bucket: name of the Google Storage bucket
105 subdir: directory within the bucket to list, or None for root directory 121 subdir: directory within the bucket to list, or None for root directory
106 """ 122 """
107 # The GS command relies on the prefix (if any) ending with a slash. 123 # The GS command relies on the prefix (if any) ending with a slash.
108 prefix = subdir or '' 124 prefix = subdir or ''
109 if prefix and not prefix.endswith('/'): 125 if prefix and not prefix.endswith('/'):
110 prefix += '/' 126 prefix += '/'
111 prefix_length = len(prefix) if prefix else 0 127 prefix_length = len(prefix) if prefix else 0
112 128
113 conn = _create_connection() 129 conn = self._create_connection()
114 b = conn.get_bucket(bucket_name=bucket) 130 b = conn.get_bucket(bucket_name=bucket)
115 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') 131 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
116 dirs = [] 132 dirs = []
117 files = [] 133 files = []
118 for item in lister: 134 for item in lister:
119 t = type(item) 135 t = type(item)
120 if t is Key: 136 if t is Key:
121 files.append(item.key[prefix_length:]) 137 files.append(item.key[prefix_length:])
122 elif t is Prefix: 138 elif t is Prefix:
123 dirs.append(item.name[prefix_length:-1]) 139 dirs.append(item.name[prefix_length:-1])
124 return (dirs, files) 140 return (dirs, files)
141
142 def _create_connection(self):
143 """Returns a GSConnection object we can use to access Google Storage."""
144 return GSConnection(
145 gs_access_key_id=self._gs_access_key_id,
146 gs_secret_access_key=self._gs_secret_access_key)
125 147
126 148
127 def _config_file_as_dict(filepath): 149 def _config_file_as_dict(filepath):
128 """Reads a boto-style config file into a dict. 150 """Reads a boto-style config file into a dict.
129 151
130 Parses all lines from the file of this form: key = value 152 Parses all lines from the file of this form: key = value
131 TODO(epoger): Create unittest. 153 TODO(epoger): Create unittest.
132 154
133 Params: 155 Params:
134 filepath: path to config file on local disk 156 filepath: path to config file on local disk
135 157
136 Returns: contents of the config file, as a dictionary 158 Returns: contents of the config file, as a dictionary
137 159
138 Raises exception if file not found. 160 Raises exception if file not found.
139 """ 161 """
140 dic = {} 162 dic = {}
141 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') 163 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$')
142 with open(filepath) as f: 164 with open(filepath) as f:
143 for line in f: 165 for line in f:
144 match = line_regex.match(line) 166 match = line_regex.match(line)
145 if match: 167 if match:
146 (key, value) = match.groups() 168 (key, value) = match.groups()
147 dic[key] = value 169 dic[key] = value
148 return dic 170 return dic
149 171
150 172
151 def _create_connection(boto_file_path=os.path.join('~','.boto')):
152 """Returns a GSConnection object we can use to access Google Storage.
153
154 Params:
155 boto_file_path: full path (local-OS-style) on local disk where .boto
156 credentials file can be found
157
158 TODO(epoger): Change this module to be object-based, where __init__() reads
159 the boto file into boto_dict once instead of repeatedly for each operation.
160
161 TODO(epoger): if the file does not exist, rather than raising an exception,
162 create a GSConnection that can operate on public files.
163 """
164 boto_file_path = os.path.expanduser(boto_file_path)
165 print 'Reading boto file from %s' % boto_file_path
166 boto_dict = _config_file_as_dict(filepath=boto_file_path)
167 return GSConnection(
168 gs_access_key_id=boto_dict['gs_access_key_id'],
169 gs_secret_access_key=boto_dict['gs_secret_access_key'])
170
171
172 def _makedirs_if_needed(path): 173 def _makedirs_if_needed(path):
173 """ Creates a directory (and any parent directories needed), if it does not 174 """Creates a directory (and any parent directories needed), if it does not
174 exist yet. 175 exist yet.
175 176
176 Args: 177 Args:
177 path: full path of directory to create 178 path: full path of directory to create
178 """ 179 """
179 try: 180 try:
180 os.makedirs(path) 181 os.makedirs(path)
181 except OSError as e: 182 except OSError as e:
182 if e.errno != errno.EEXIST: 183 if e.errno != errno.EEXIST:
183 raise 184 raise
184 185
185 186
186 def _run_self_test(): 187 def _run_self_test():
187 bucket = 'chromium-skia-gm' 188 bucket = 'chromium-skia-gm'
188 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) 189 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
189 subdir = 'subdir' 190 subdir = 'subdir'
190 filenames_to_upload = ['file1', 'file2'] 191 filenames_to_upload = ['file1', 'file2']
192 gs = GSUtils()
191 193
192 # Upload test files to Google Storage. 194 # Upload test files to Google Storage.
193 local_src_dir = tempfile.mkdtemp() 195 local_src_dir = tempfile.mkdtemp()
194 os.mkdir(os.path.join(local_src_dir, subdir)) 196 os.mkdir(os.path.join(local_src_dir, subdir))
195 try: 197 try:
196 for filename in filenames_to_upload: 198 for filename in filenames_to_upload:
197 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: 199 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
198 f.write('contents of %s\n' % filename) 200 f.write('contents of %s\n' % filename)
199 upload_file(source_path=os.path.join(local_src_dir, subdir, filename), 201 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
200 dest_bucket=bucket, 202 dest_bucket=bucket,
201 dest_path=posixpath.join(remote_dir, subdir, filename)) 203 dest_path=posixpath.join(remote_dir, subdir, filename))
202 finally: 204 finally:
203 shutil.rmtree(local_src_dir) 205 shutil.rmtree(local_src_dir)
204 206
205 # Get a list of the files we uploaded to Google Storage. 207 # Get a list of the files we uploaded to Google Storage.
206 (dirs, files) = list_bucket_contents( 208 (dirs, files) = gs.list_bucket_contents(
207 bucket=bucket, subdir=remote_dir) 209 bucket=bucket, subdir=remote_dir)
208 assert dirs == [subdir] 210 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir)
209 assert files == [] 211 assert files == [], '%s == []' % files
210 (dirs, files) = list_bucket_contents( 212 (dirs, files) = gs.list_bucket_contents(
211 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) 213 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
212 assert dirs == [] 214 assert dirs == [], '%s == []' % dirs
213 assert files == filenames_to_upload 215 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload)
214 216
215 # Download the files we uploaded to Google Storage, and validate contents. 217 # Download the files we uploaded to Google Storage, and validate contents.
216 local_dest_dir = tempfile.mkdtemp() 218 local_dest_dir = tempfile.mkdtemp()
217 try: 219 try:
218 for filename in filenames_to_upload: 220 for filename in filenames_to_upload:
219 download_file(source_bucket=bucket, 221 gs.download_file(source_bucket=bucket,
220 source_path=posixpath.join(remote_dir, subdir, filename), 222 source_path=posixpath.join(remote_dir, subdir, filename),
221 dest_path=os.path.join(local_dest_dir, subdir, filename), 223 dest_path=os.path.join(local_dest_dir, subdir, filename),
222 create_subdirs_if_needed=True) 224 create_subdirs_if_needed=True)
223 with open(os.path.join(local_dest_dir, subdir, filename)) as f: 225 with open(os.path.join(local_dest_dir, subdir, filename)) as f:
224 file_contents = f.read() 226 file_contents = f.read()
225 assert file_contents == 'contents of %s\n' % filename 227 assert file_contents == 'contents of %s\n' % filename, (
228 '%s == "contents of %s\n"' % (file_contents, filename))
226 finally: 229 finally:
227 shutil.rmtree(local_dest_dir) 230 shutil.rmtree(local_dest_dir)
228 231
229 # Delete all the files we uploaded to Google Storage. 232 # Delete all the files we uploaded to Google Storage.
230 for filename in filenames_to_upload: 233 for filename in filenames_to_upload:
231 delete_file(bucket=bucket, 234 gs.delete_file(bucket=bucket,
232 path=posixpath.join(remote_dir, subdir, filename)) 235 path=posixpath.join(remote_dir, subdir, filename))
233 236
234 # Confirm that we deleted all the files we uploaded to Google Storage. 237 # Confirm that we deleted all the files we uploaded to Google Storage.
235 (dirs, files) = list_bucket_contents( 238 (dirs, files) = gs.list_bucket_contents(
236 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) 239 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
237 assert dirs == [] 240 assert dirs == [], '%s == []' % dirs
238 assert files == [] 241 assert files == [], '%s == []' % files
239 242
240 243
241 # TODO(epoger): How should we exercise this self-test? 244 # TODO(epoger): How should we exercise this self-test?
242 # I avoided using the standard unittest framework, because these Google Storage 245 # I avoided using the standard unittest framework, because these Google Storage
243 # operations are expensive and require .boto permissions. 246 # operations are expensive and require .boto permissions.
244 # 247 #
245 # How can we automatically test this code without wasting too many resources 248 # How can we automatically test this code without wasting too many resources
246 # or needing .boto permissions? 249 # or needing .boto permissions?
247 if __name__ == '__main__': 250 if __name__ == '__main__':
248 _run_self_test() 251 _run_self_test()
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698