Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(246)

Side by Side Diff: py/utils/gs_utils.py

Issue 385013004: make gs_utils.py class-based, so credentials file can be read just once (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library. 10 Utilities for accessing Google Cloud Storage, using the boto library.
(...skipping 22 matching lines...) Expand all
33 if import_dirpath not in sys.path: 33 if import_dirpath not in sys.path:
34 # We need to insert at the beginning of the path, to make sure that our 34 # We need to insert at the beginning of the path, to make sure that our
35 # imported versions are favored over others that might be in the path. 35 # imported versions are favored over others that might be in the path.
36 sys.path.insert(0, import_dirpath) 36 sys.path.insert(0, import_dirpath)
37 from boto.gs.connection import GSConnection 37 from boto.gs.connection import GSConnection
38 from boto.gs.key import Key 38 from boto.gs.key import Key
39 from boto.s3.bucketlistresultset import BucketListResultSet 39 from boto.s3.bucketlistresultset import BucketListResultSet
40 from boto.s3.prefix import Prefix 40 from boto.s3.prefix import Prefix
41 41
42 42
43 def delete_file(bucket, path): 43 class GSUtils(object):
44 """Delete a single file within a GS bucket. 44 """Utilities for accessing Google Cloud Storage, using the boto library."""
45 45
46 TODO(epoger): what if bucket or path does not exist? Should probably raise 46 def __init__(self, boto_file_path=os.path.join('~','.boto')):
epoger 2014/07/14 15:36:42 the constructor is new, and includes a chunk of _c
47 an exception. Implement, and add a test to exercise this. 47 """Constructor.
48 48
49 Params: 49 Params:
50 bucket: GS bucket to delete a file from 50 boto_file_path: full path (local-OS-style) on local disk where .boto
51 path: full path (Posix-style) of the file within the bucket to delete 51 credentials file can be found
52 """
53 conn = _create_connection()
54 b = conn.get_bucket(bucket_name=bucket)
55 item = Key(b)
56 item.key = path
57 item.delete()
58 52
53 TODO(epoger): if the file does not exist, rather than raising an exception,
54 allow the caller to operate on public files.
rmistry 2014/07/14 17:41:50 Nit: Maybe reword this a little- boto_file_path:
55 """
56 boto_file_path = os.path.expanduser(boto_file_path)
57 print 'Reading boto file from %s' % boto_file_path
58 boto_dict = _config_file_as_dict(filepath=boto_file_path)
59 self._gs_access_key_id = boto_dict['gs_access_key_id']
60 self._gs_secret_access_key = boto_dict['gs_secret_access_key']
59 61
60 def upload_file(source_path, dest_bucket, dest_path): 62 def delete_file(self, bucket, path):
epoger 2014/07/14 15:36:42 moved from line 43
61 """Upload contents of a local file to Google Storage. 63 """Delete a single file within a GS bucket.
62 64
63 TODO(epoger): Add the extra parameters provided by upload_file() within 65 TODO(epoger): what if bucket or path does not exist? Should probably raise
64 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/u tils/old_gs_utils.py , 66 an exception. Implement, and add a test to exercise this.
65 so we can replace that function with this one.
66 67
67 params: 68 Params:
68 source_path: full path (local-OS-style) on local disk to read from 69 bucket: GS bucket to delete a file from
69 dest_bucket: GCS bucket to copy the file to 70 path: full path (Posix-style) of the file within the bucket to delete
70 dest_path: full path (Posix-style) within that bucket 71 """
71 """ 72 conn = self._create_connection()
72 conn = _create_connection() 73 b = conn.get_bucket(bucket_name=bucket)
73 b = conn.get_bucket(bucket_name=dest_bucket) 74 item = Key(b)
74 item = Key(b) 75 item.key = path
75 item.key = dest_path 76 item.delete()
76 item.set_contents_from_filename(filename=source_path)
77 77
78 def upload_file(self, source_path, dest_bucket, dest_path):
epoger 2014/07/14 15:36:42 moved from line 60
79 """Upload contents of a local file to Google Storage.
78 80
79 def download_file(source_bucket, source_path, dest_path, 81 TODO(epoger): Add the extra parameters provided by upload_file() within
80 create_subdirs_if_needed=False): 82 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts /utils/old_gs_utils.py ,
81 """ Downloads a single file from Google Cloud Storage to local disk. 83 so we can replace that function with this one.
82 84
83 Args: 85 params:
84 source_bucket: GCS bucket to download the file from 86 source_path: full path (local-OS-style) on local disk to read from
85 source_path: full path (Posix-style) within that bucket 87 dest_bucket: GCS bucket to copy the file to
86 dest_path: full path (local-OS-style) on local disk to copy the file to 88 dest_path: full path (Posix-style) within that bucket
87 create_subdirs_if_needed: boolean; whether to create subdirectories as 89 """
88 needed to create dest_path 90 conn = self._create_connection()
89 """ 91 b = conn.get_bucket(bucket_name=dest_bucket)
90 conn = _create_connection() 92 item = Key(b)
91 b = conn.get_bucket(bucket_name=source_bucket) 93 item.key = dest_path
92 item = Key(b) 94 item.set_contents_from_filename(filename=source_path)
93 item.key = source_path
94 if create_subdirs_if_needed:
95 _makedirs_if_needed(os.path.dirname(dest_path))
96 with open(dest_path, 'w') as f:
97 item.get_contents_to_file(fp=f)
98 95
96 def download_file(self, source_bucket, source_path, dest_path,
epoger 2014/07/14 15:36:41 moved from line 79
97 create_subdirs_if_needed=False):
98 """Downloads a single file from Google Cloud Storage to local disk.
99 99
100 def list_bucket_contents(bucket, subdir=None): 100 Args:
101 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. 101 source_bucket: GCS bucket to download the file from
102 source_path: full path (Posix-style) within that bucket
103 dest_path: full path (local-OS-style) on local disk to copy the file to
104 create_subdirs_if_needed: boolean; whether to create subdirectories as
105 needed to create dest_path
106 """
107 conn = self._create_connection()
108 b = conn.get_bucket(bucket_name=source_bucket)
109 item = Key(b)
110 item.key = source_path
111 if create_subdirs_if_needed:
112 _makedirs_if_needed(os.path.dirname(dest_path))
113 with open(dest_path, 'w') as f:
114 item.get_contents_to_file(fp=f)
102 115
103 Args: 116 def list_bucket_contents(self, bucket, subdir=None):
epoger 2014/07/14 15:36:42 moved from line 100
104 bucket: name of the Google Storage bucket 117 """Returns files in the Google Storage bucket as a (dirs, files) tuple.
105 subdir: directory within the bucket to list, or None for root directory
106 """
107 # The GS command relies on the prefix (if any) ending with a slash.
108 prefix = subdir or ''
109 if prefix and not prefix.endswith('/'):
110 prefix += '/'
111 prefix_length = len(prefix) if prefix else 0
112 118
113 conn = _create_connection() 119 Args:
114 b = conn.get_bucket(bucket_name=bucket) 120 bucket: name of the Google Storage bucket
115 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') 121 subdir: directory within the bucket to list, or None for root directory
116 dirs = [] 122 """
117 files = [] 123 # The GS command relies on the prefix (if any) ending with a slash.
118 for item in lister: 124 prefix = subdir or ''
119 t = type(item) 125 if prefix and not prefix.endswith('/'):
120 if t is Key: 126 prefix += '/'
121 files.append(item.key[prefix_length:]) 127 prefix_length = len(prefix) if prefix else 0
122 elif t is Prefix: 128
123 dirs.append(item.name[prefix_length:-1]) 129 conn = self._create_connection()
124 return (dirs, files) 130 b = conn.get_bucket(bucket_name=bucket)
131 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
132 dirs = []
133 files = []
134 for item in lister:
135 t = type(item)
136 if t is Key:
137 files.append(item.key[prefix_length:])
138 elif t is Prefix:
139 dirs.append(item.name[prefix_length:-1])
140 return (dirs, files)
141
142 def _create_connection(self):
epoger 2014/07/14 15:36:42 moved from line 151
143 """Returns a GSConnection object we can use to access Google Storage."""
144 return GSConnection(
145 gs_access_key_id=self._gs_access_key_id,
146 gs_secret_access_key=self._gs_secret_access_key)
125 147
126 148
127 def _config_file_as_dict(filepath): 149 def _config_file_as_dict(filepath):
128 """Reads a boto-style config file into a dict. 150 """Reads a boto-style config file into a dict.
129 151
130 Parses all lines from the file of this form: key = value 152 Parses all lines from the file of this form: key = value
131 TODO(epoger): Create unittest. 153 TODO(epoger): Create unittest.
132 154
133 Params: 155 Params:
134 filepath: path to config file on local disk 156 filepath: path to config file on local disk
135 157
136 Returns: contents of the config file, as a dictionary 158 Returns: contents of the config file, as a dictionary
137 159
138 Raises exception if file not found. 160 Raises exception if file not found.
139 """ 161 """
140 dic = {} 162 dic = {}
141 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') 163 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$')
142 with open(filepath) as f: 164 with open(filepath) as f:
143 for line in f: 165 for line in f:
144 match = line_regex.match(line) 166 match = line_regex.match(line)
145 if match: 167 if match:
146 (key, value) = match.groups() 168 (key, value) = match.groups()
147 dic[key] = value 169 dic[key] = value
148 return dic 170 return dic
149 171
150 172
151 def _create_connection(boto_file_path=os.path.join('~','.boto')):
152 """Returns a GSConnection object we can use to access Google Storage.
153
154 Params:
155 boto_file_path: full path (local-OS-style) on local disk where .boto
156 credentials file can be found
157
158 TODO(epoger): Change this module to be object-based, where __init__() reads
159 the boto file into boto_dict once instead of repeatedly for each operation.
160
161 TODO(epoger): if the file does not exist, rather than raising an exception,
162 create a GSConnection that can operate on public files.
163 """
164 boto_file_path = os.path.expanduser(boto_file_path)
165 print 'Reading boto file from %s' % boto_file_path
166 boto_dict = _config_file_as_dict(filepath=boto_file_path)
167 return GSConnection(
168 gs_access_key_id=boto_dict['gs_access_key_id'],
169 gs_secret_access_key=boto_dict['gs_secret_access_key'])
170
171
172 def _makedirs_if_needed(path): 173 def _makedirs_if_needed(path):
173 """ Creates a directory (and any parent directories needed), if it does not 174 """Creates a directory (and any parent directories needed), if it does not
174 exist yet. 175 exist yet.
175 176
176 Args: 177 Args:
177 path: full path of directory to create 178 path: full path of directory to create
178 """ 179 """
179 try: 180 try:
180 os.makedirs(path) 181 os.makedirs(path)
181 except OSError as e: 182 except OSError as e:
182 if e.errno != errno.EEXIST: 183 if e.errno != errno.EEXIST:
183 raise 184 raise
184 185
185 186
186 def _run_self_test(): 187 def _run_self_test():
187 bucket = 'chromium-skia-gm' 188 bucket = 'chromium-skia-gm'
188 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) 189 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
189 subdir = 'subdir' 190 subdir = 'subdir'
190 filenames_to_upload = ['file1', 'file2'] 191 filenames_to_upload = ['file1', 'file2']
192 gs = GSUtils()
191 193
192 # Upload test files to Google Storage. 194 # Upload test files to Google Storage.
193 local_src_dir = tempfile.mkdtemp() 195 local_src_dir = tempfile.mkdtemp()
194 os.mkdir(os.path.join(local_src_dir, subdir)) 196 os.mkdir(os.path.join(local_src_dir, subdir))
195 try: 197 try:
196 for filename in filenames_to_upload: 198 for filename in filenames_to_upload:
197 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: 199 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
198 f.write('contents of %s\n' % filename) 200 f.write('contents of %s\n' % filename)
199 upload_file(source_path=os.path.join(local_src_dir, subdir, filename), 201 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
200 dest_bucket=bucket, 202 dest_bucket=bucket,
201 dest_path=posixpath.join(remote_dir, subdir, filename)) 203 dest_path=posixpath.join(remote_dir, subdir, filename))
202 finally: 204 finally:
203 shutil.rmtree(local_src_dir) 205 shutil.rmtree(local_src_dir)
204 206
205 # Get a list of the files we uploaded to Google Storage. 207 # Get a list of the files we uploaded to Google Storage.
206 (dirs, files) = list_bucket_contents( 208 (dirs, files) = gs.list_bucket_contents(
207 bucket=bucket, subdir=remote_dir) 209 bucket=bucket, subdir=remote_dir)
208 assert dirs == [subdir] 210 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir)
epoger 2014/07/14 15:36:42 added more info when asserts fire
209 assert files == [] 211 assert files == [], '%s == []' % files
210 (dirs, files) = list_bucket_contents( 212 (dirs, files) = gs.list_bucket_contents(
211 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) 213 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
212 assert dirs == [] 214 assert dirs == [], '%s == []' % dirs
213 assert files == filenames_to_upload 215 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload)
214 216
215 # Download the files we uploaded to Google Storage, and validate contents. 217 # Download the files we uploaded to Google Storage, and validate contents.
216 local_dest_dir = tempfile.mkdtemp() 218 local_dest_dir = tempfile.mkdtemp()
217 try: 219 try:
218 for filename in filenames_to_upload: 220 for filename in filenames_to_upload:
219 download_file(source_bucket=bucket, 221 gs.download_file(source_bucket=bucket,
220 source_path=posixpath.join(remote_dir, subdir, filename), 222 source_path=posixpath.join(remote_dir, subdir, filename),
221 dest_path=os.path.join(local_dest_dir, subdir, filename), 223 dest_path=os.path.join(local_dest_dir, subdir, filename),
222 create_subdirs_if_needed=True) 224 create_subdirs_if_needed=True)
223 with open(os.path.join(local_dest_dir, subdir, filename)) as f: 225 with open(os.path.join(local_dest_dir, subdir, filename)) as f:
224 file_contents = f.read() 226 file_contents = f.read()
225 assert file_contents == 'contents of %s\n' % filename 227 assert file_contents == 'contents of %s\n' % filename, (
228 '%s == "contents of %s\n"' % (file_contents, filename))
226 finally: 229 finally:
227 shutil.rmtree(local_dest_dir) 230 shutil.rmtree(local_dest_dir)
228 231
229 # Delete all the files we uploaded to Google Storage. 232 # Delete all the files we uploaded to Google Storage.
230 for filename in filenames_to_upload: 233 for filename in filenames_to_upload:
231 delete_file(bucket=bucket, 234 gs.delete_file(bucket=bucket,
232 path=posixpath.join(remote_dir, subdir, filename)) 235 path=posixpath.join(remote_dir, subdir, filename))
233 236
234 # Confirm that we deleted all the files we uploaded to Google Storage. 237 # Confirm that we deleted all the files we uploaded to Google Storage.
235 (dirs, files) = list_bucket_contents( 238 (dirs, files) = gs.list_bucket_contents(
236 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) 239 bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
237 assert dirs == [] 240 assert dirs == [], '%s == []' % dirs
238 assert files == [] 241 assert files == [], '%s == []' % files
239 242
240 243
241 # TODO(epoger): How should we exercise this self-test? 244 # TODO(epoger): How should we exercise this self-test?
242 # I avoided using the standard unittest framework, because these Google Storage 245 # I avoided using the standard unittest framework, because these Google Storage
243 # operations are expensive and require .boto permissions. 246 # operations are expensive and require .boto permissions.
244 # 247 #
245 # How can we automatically test this code without wasting too many resources 248 # How can we automatically test this code without wasting too many resources
246 # or needing .boto permissions? 249 # or needing .boto permissions?
247 if __name__ == '__main__': 250 if __name__ == '__main__':
248 _run_self_test() 251 _run_self_test()
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698