OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library. | 10 Utilities for accessing Google Cloud Storage, using the boto library. |
(...skipping 22 matching lines...) Expand all Loading... |
33 if import_dirpath not in sys.path: | 33 if import_dirpath not in sys.path: |
34 # We need to insert at the beginning of the path, to make sure that our | 34 # We need to insert at the beginning of the path, to make sure that our |
35 # imported versions are favored over others that might be in the path. | 35 # imported versions are favored over others that might be in the path. |
36 sys.path.insert(0, import_dirpath) | 36 sys.path.insert(0, import_dirpath) |
37 from boto.gs.connection import GSConnection | 37 from boto.gs.connection import GSConnection |
38 from boto.gs.key import Key | 38 from boto.gs.key import Key |
39 from boto.s3.bucketlistresultset import BucketListResultSet | 39 from boto.s3.bucketlistresultset import BucketListResultSet |
40 from boto.s3.prefix import Prefix | 40 from boto.s3.prefix import Prefix |
41 | 41 |
42 | 42 |
43 def delete_file(bucket, path): | 43 class GSUtils(object): |
44 """Delete a single file within a GS bucket. | 44 """Utilities for accessing Google Cloud Storage, using the boto library.""" |
45 | 45 |
46 TODO(epoger): what if bucket or path does not exist? Should probably raise | 46 def __init__(self, boto_file_path=os.path.join('~','.boto')): |
47 an exception. Implement, and add a test to exercise this. | 47 """Constructor. |
48 | 48 |
49 Params: | 49 Params: |
50 bucket: GS bucket to delete a file from | 50 boto_file_path: full path (local-OS-style) on local disk where .boto |
51 path: full path (Posix-style) of the file within the bucket to delete | 51 credentials file can be found. An exception is thrown if this file |
52 """ | 52 is missing. |
53 conn = _create_connection() | 53 TODO(epoger): Change missing-file behavior: allow the caller to |
54 b = conn.get_bucket(bucket_name=bucket) | 54 operate on public files in Google Storage. |
55 item = Key(b) | 55 """ |
56 item.key = path | 56 boto_file_path = os.path.expanduser(boto_file_path) |
57 item.delete() | 57 print 'Reading boto file from %s' % boto_file_path |
| 58 boto_dict = _config_file_as_dict(filepath=boto_file_path) |
| 59 self._gs_access_key_id = boto_dict['gs_access_key_id'] |
| 60 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] |
58 | 61 |
| 62 def delete_file(self, bucket, path): |
| 63 """Delete a single file within a GS bucket. |
59 | 64 |
60 def upload_file(source_path, dest_bucket, dest_path): | 65 TODO(epoger): what if bucket or path does not exist? Should probably raise |
61 """Upload contents of a local file to Google Storage. | 66 an exception. Implement, and add a test to exercise this. |
62 | 67 |
63 TODO(epoger): Add the extra parameters provided by upload_file() within | 68 Params: |
64 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/u
tils/old_gs_utils.py , | 69 bucket: GS bucket to delete a file from |
65 so we can replace that function with this one. | 70 path: full path (Posix-style) of the file within the bucket to delete |
| 71 """ |
| 72 conn = self._create_connection() |
| 73 b = conn.get_bucket(bucket_name=bucket) |
| 74 item = Key(b) |
| 75 item.key = path |
| 76 item.delete() |
66 | 77 |
67 params: | 78 def upload_file(self, source_path, dest_bucket, dest_path): |
68 source_path: full path (local-OS-style) on local disk to read from | 79 """Upload contents of a local file to Google Storage. |
69 dest_bucket: GCS bucket to copy the file to | |
70 dest_path: full path (Posix-style) within that bucket | |
71 """ | |
72 conn = _create_connection() | |
73 b = conn.get_bucket(bucket_name=dest_bucket) | |
74 item = Key(b) | |
75 item.key = dest_path | |
76 item.set_contents_from_filename(filename=source_path) | |
77 | 80 |
| 81 TODO(epoger): Add the extra parameters provided by upload_file() within |
| 82 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts
/utils/old_gs_utils.py , |
| 83 so we can replace that function with this one. |
78 | 84 |
79 def download_file(source_bucket, source_path, dest_path, | 85 params: |
80 create_subdirs_if_needed=False): | 86 source_path: full path (local-OS-style) on local disk to read from |
81 """ Downloads a single file from Google Cloud Storage to local disk. | 87 dest_bucket: GCS bucket to copy the file to |
| 88 dest_path: full path (Posix-style) within that bucket |
| 89 """ |
| 90 conn = self._create_connection() |
| 91 b = conn.get_bucket(bucket_name=dest_bucket) |
| 92 item = Key(b) |
| 93 item.key = dest_path |
| 94 item.set_contents_from_filename(filename=source_path) |
82 | 95 |
83 Args: | 96 def download_file(self, source_bucket, source_path, dest_path, |
84 source_bucket: GCS bucket to download the file from | 97 create_subdirs_if_needed=False): |
85 source_path: full path (Posix-style) within that bucket | 98 """Downloads a single file from Google Cloud Storage to local disk. |
86 dest_path: full path (local-OS-style) on local disk to copy the file to | |
87 create_subdirs_if_needed: boolean; whether to create subdirectories as | |
88 needed to create dest_path | |
89 """ | |
90 conn = _create_connection() | |
91 b = conn.get_bucket(bucket_name=source_bucket) | |
92 item = Key(b) | |
93 item.key = source_path | |
94 if create_subdirs_if_needed: | |
95 _makedirs_if_needed(os.path.dirname(dest_path)) | |
96 with open(dest_path, 'w') as f: | |
97 item.get_contents_to_file(fp=f) | |
98 | 99 |
| 100 Args: |
| 101 source_bucket: GCS bucket to download the file from |
| 102 source_path: full path (Posix-style) within that bucket |
| 103 dest_path: full path (local-OS-style) on local disk to copy the file to |
| 104 create_subdirs_if_needed: boolean; whether to create subdirectories as |
| 105 needed to create dest_path |
| 106 """ |
| 107 conn = self._create_connection() |
| 108 b = conn.get_bucket(bucket_name=source_bucket) |
| 109 item = Key(b) |
| 110 item.key = source_path |
| 111 if create_subdirs_if_needed: |
| 112 _makedirs_if_needed(os.path.dirname(dest_path)) |
| 113 with open(dest_path, 'w') as f: |
| 114 item.get_contents_to_file(fp=f) |
99 | 115 |
100 def list_bucket_contents(bucket, subdir=None): | 116 def list_bucket_contents(self, bucket, subdir=None): |
101 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. | 117 """Returns files in the Google Storage bucket as a (dirs, files) tuple. |
102 | 118 |
103 Args: | 119 Args: |
104 bucket: name of the Google Storage bucket | 120 bucket: name of the Google Storage bucket |
105 subdir: directory within the bucket to list, or None for root directory | 121 subdir: directory within the bucket to list, or None for root directory |
106 """ | 122 """ |
107 # The GS command relies on the prefix (if any) ending with a slash. | 123 # The GS command relies on the prefix (if any) ending with a slash. |
108 prefix = subdir or '' | 124 prefix = subdir or '' |
109 if prefix and not prefix.endswith('/'): | 125 if prefix and not prefix.endswith('/'): |
110 prefix += '/' | 126 prefix += '/' |
111 prefix_length = len(prefix) if prefix else 0 | 127 prefix_length = len(prefix) if prefix else 0 |
112 | 128 |
113 conn = _create_connection() | 129 conn = self._create_connection() |
114 b = conn.get_bucket(bucket_name=bucket) | 130 b = conn.get_bucket(bucket_name=bucket) |
115 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') | 131 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
116 dirs = [] | 132 dirs = [] |
117 files = [] | 133 files = [] |
118 for item in lister: | 134 for item in lister: |
119 t = type(item) | 135 t = type(item) |
120 if t is Key: | 136 if t is Key: |
121 files.append(item.key[prefix_length:]) | 137 files.append(item.key[prefix_length:]) |
122 elif t is Prefix: | 138 elif t is Prefix: |
123 dirs.append(item.name[prefix_length:-1]) | 139 dirs.append(item.name[prefix_length:-1]) |
124 return (dirs, files) | 140 return (dirs, files) |
| 141 |
| 142 def _create_connection(self): |
| 143 """Returns a GSConnection object we can use to access Google Storage.""" |
| 144 return GSConnection( |
| 145 gs_access_key_id=self._gs_access_key_id, |
| 146 gs_secret_access_key=self._gs_secret_access_key) |
125 | 147 |
126 | 148 |
127 def _config_file_as_dict(filepath): | 149 def _config_file_as_dict(filepath): |
128 """Reads a boto-style config file into a dict. | 150 """Reads a boto-style config file into a dict. |
129 | 151 |
130 Parses all lines from the file of this form: key = value | 152 Parses all lines from the file of this form: key = value |
131 TODO(epoger): Create unittest. | 153 TODO(epoger): Create unittest. |
132 | 154 |
133 Params: | 155 Params: |
134 filepath: path to config file on local disk | 156 filepath: path to config file on local disk |
135 | 157 |
136 Returns: contents of the config file, as a dictionary | 158 Returns: contents of the config file, as a dictionary |
137 | 159 |
138 Raises exception if file not found. | 160 Raises exception if file not found. |
139 """ | 161 """ |
140 dic = {} | 162 dic = {} |
141 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') | 163 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') |
142 with open(filepath) as f: | 164 with open(filepath) as f: |
143 for line in f: | 165 for line in f: |
144 match = line_regex.match(line) | 166 match = line_regex.match(line) |
145 if match: | 167 if match: |
146 (key, value) = match.groups() | 168 (key, value) = match.groups() |
147 dic[key] = value | 169 dic[key] = value |
148 return dic | 170 return dic |
149 | 171 |
150 | 172 |
151 def _create_connection(boto_file_path=os.path.join('~','.boto')): | |
152 """Returns a GSConnection object we can use to access Google Storage. | |
153 | |
154 Params: | |
155 boto_file_path: full path (local-OS-style) on local disk where .boto | |
156 credentials file can be found | |
157 | |
158 TODO(epoger): Change this module to be object-based, where __init__() reads | |
159 the boto file into boto_dict once instead of repeatedly for each operation. | |
160 | |
161 TODO(epoger): if the file does not exist, rather than raising an exception, | |
162 create a GSConnection that can operate on public files. | |
163 """ | |
164 boto_file_path = os.path.expanduser(boto_file_path) | |
165 print 'Reading boto file from %s' % boto_file_path | |
166 boto_dict = _config_file_as_dict(filepath=boto_file_path) | |
167 return GSConnection( | |
168 gs_access_key_id=boto_dict['gs_access_key_id'], | |
169 gs_secret_access_key=boto_dict['gs_secret_access_key']) | |
170 | |
171 | |
172 def _makedirs_if_needed(path): | 173 def _makedirs_if_needed(path): |
173 """ Creates a directory (and any parent directories needed), if it does not | 174 """Creates a directory (and any parent directories needed), if it does not |
174 exist yet. | 175 exist yet. |
175 | 176 |
176 Args: | 177 Args: |
177 path: full path of directory to create | 178 path: full path of directory to create |
178 """ | 179 """ |
179 try: | 180 try: |
180 os.makedirs(path) | 181 os.makedirs(path) |
181 except OSError as e: | 182 except OSError as e: |
182 if e.errno != errno.EEXIST: | 183 if e.errno != errno.EEXIST: |
183 raise | 184 raise |
184 | 185 |
185 | 186 |
186 def _run_self_test(): | 187 def _run_self_test(): |
187 bucket = 'chromium-skia-gm' | 188 bucket = 'chromium-skia-gm' |
188 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) | 189 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
189 subdir = 'subdir' | 190 subdir = 'subdir' |
190 filenames_to_upload = ['file1', 'file2'] | 191 filenames_to_upload = ['file1', 'file2'] |
| 192 gs = GSUtils() |
191 | 193 |
192 # Upload test files to Google Storage. | 194 # Upload test files to Google Storage. |
193 local_src_dir = tempfile.mkdtemp() | 195 local_src_dir = tempfile.mkdtemp() |
194 os.mkdir(os.path.join(local_src_dir, subdir)) | 196 os.mkdir(os.path.join(local_src_dir, subdir)) |
195 try: | 197 try: |
196 for filename in filenames_to_upload: | 198 for filename in filenames_to_upload: |
197 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: | 199 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
198 f.write('contents of %s\n' % filename) | 200 f.write('contents of %s\n' % filename) |
199 upload_file(source_path=os.path.join(local_src_dir, subdir, filename), | 201 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |
200 dest_bucket=bucket, | 202 dest_bucket=bucket, |
201 dest_path=posixpath.join(remote_dir, subdir, filename)) | 203 dest_path=posixpath.join(remote_dir, subdir, filename)) |
202 finally: | 204 finally: |
203 shutil.rmtree(local_src_dir) | 205 shutil.rmtree(local_src_dir) |
204 | 206 |
205 # Get a list of the files we uploaded to Google Storage. | 207 # Get a list of the files we uploaded to Google Storage. |
206 (dirs, files) = list_bucket_contents( | 208 (dirs, files) = gs.list_bucket_contents( |
207 bucket=bucket, subdir=remote_dir) | 209 bucket=bucket, subdir=remote_dir) |
208 assert dirs == [subdir] | 210 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) |
209 assert files == [] | 211 assert files == [], '%s == []' % files |
210 (dirs, files) = list_bucket_contents( | 212 (dirs, files) = gs.list_bucket_contents( |
211 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 213 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
212 assert dirs == [] | 214 assert dirs == [], '%s == []' % dirs |
213 assert files == filenames_to_upload | 215 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) |
214 | 216 |
215 # Download the files we uploaded to Google Storage, and validate contents. | 217 # Download the files we uploaded to Google Storage, and validate contents. |
216 local_dest_dir = tempfile.mkdtemp() | 218 local_dest_dir = tempfile.mkdtemp() |
217 try: | 219 try: |
218 for filename in filenames_to_upload: | 220 for filename in filenames_to_upload: |
219 download_file(source_bucket=bucket, | 221 gs.download_file(source_bucket=bucket, |
220 source_path=posixpath.join(remote_dir, subdir, filename), | 222 source_path=posixpath.join(remote_dir, subdir, filename), |
221 dest_path=os.path.join(local_dest_dir, subdir, filename), | 223 dest_path=os.path.join(local_dest_dir, subdir, filename), |
222 create_subdirs_if_needed=True) | 224 create_subdirs_if_needed=True) |
223 with open(os.path.join(local_dest_dir, subdir, filename)) as f: | 225 with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
224 file_contents = f.read() | 226 file_contents = f.read() |
225 assert file_contents == 'contents of %s\n' % filename | 227 assert file_contents == 'contents of %s\n' % filename, ( |
| 228 '%s == "contents of %s\n"' % (file_contents, filename)) |
226 finally: | 229 finally: |
227 shutil.rmtree(local_dest_dir) | 230 shutil.rmtree(local_dest_dir) |
228 | 231 |
229 # Delete all the files we uploaded to Google Storage. | 232 # Delete all the files we uploaded to Google Storage. |
230 for filename in filenames_to_upload: | 233 for filename in filenames_to_upload: |
231 delete_file(bucket=bucket, | 234 gs.delete_file(bucket=bucket, |
232 path=posixpath.join(remote_dir, subdir, filename)) | 235 path=posixpath.join(remote_dir, subdir, filename)) |
233 | 236 |
234 # Confirm that we deleted all the files we uploaded to Google Storage. | 237 # Confirm that we deleted all the files we uploaded to Google Storage. |
235 (dirs, files) = list_bucket_contents( | 238 (dirs, files) = gs.list_bucket_contents( |
236 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 239 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
237 assert dirs == [] | 240 assert dirs == [], '%s == []' % dirs |
238 assert files == [] | 241 assert files == [], '%s == []' % files |
239 | 242 |
240 | 243 |
241 # TODO(epoger): How should we exercise this self-test? | 244 # TODO(epoger): How should we exercise this self-test? |
242 # I avoided using the standard unittest framework, because these Google Storage | 245 # I avoided using the standard unittest framework, because these Google Storage |
243 # operations are expensive and require .boto permissions. | 246 # operations are expensive and require .boto permissions. |
244 # | 247 # |
245 # How can we automatically test this code without wasting too many resources | 248 # How can we automatically test this code without wasting too many resources |
246 # or needing .boto permissions? | 249 # or needing .boto permissions? |
247 if __name__ == '__main__': | 250 if __name__ == '__main__': |
248 _run_self_test() | 251 _run_self_test() |
OLD | NEW |