| OLD | NEW |
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 | 2 |
| 3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
| 4 """ | 4 """ |
| 5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
| 6 | 6 |
| 7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
| 8 found in the LICENSE file. | 8 found in the LICENSE file. |
| 9 | 9 |
| 10 Utilities for accessing Google Cloud Storage, using the boto library. | 10 Utilities for accessing Google Cloud Storage, using the boto library. |
| (...skipping 22 matching lines...) Expand all Loading... |
| 33 if import_dirpath not in sys.path: | 33 if import_dirpath not in sys.path: |
| 34 # We need to insert at the beginning of the path, to make sure that our | 34 # We need to insert at the beginning of the path, to make sure that our |
| 35 # imported versions are favored over others that might be in the path. | 35 # imported versions are favored over others that might be in the path. |
| 36 sys.path.insert(0, import_dirpath) | 36 sys.path.insert(0, import_dirpath) |
| 37 from boto.gs.connection import GSConnection | 37 from boto.gs.connection import GSConnection |
| 38 from boto.gs.key import Key | 38 from boto.gs.key import Key |
| 39 from boto.s3.bucketlistresultset import BucketListResultSet | 39 from boto.s3.bucketlistresultset import BucketListResultSet |
| 40 from boto.s3.prefix import Prefix | 40 from boto.s3.prefix import Prefix |
| 41 | 41 |
| 42 | 42 |
| 43 def delete_file(bucket, path): | 43 class GSUtils(object): |
| 44 """Delete a single file within a GS bucket. | 44 """Utilities for accessing Google Cloud Storage, using the boto library.""" |
| 45 | 45 |
| 46 TODO(epoger): what if bucket or path does not exist? Should probably raise | 46 def __init__(self, boto_file_path=os.path.join('~','.boto')): |
| 47 an exception. Implement, and add a test to exercise this. | 47 """Constructor. |
| 48 | 48 |
| 49 Params: | 49 Params: |
| 50 bucket: GS bucket to delete a file from | 50 boto_file_path: full path (local-OS-style) on local disk where .boto |
| 51 path: full path (Posix-style) of the file within the bucket to delete | 51 credentials file can be found. An exception is thrown if this file |
| 52 """ | 52 is missing. |
| 53 conn = _create_connection() | 53 TODO(epoger): Change missing-file behavior: allow the caller to |
| 54 b = conn.get_bucket(bucket_name=bucket) | 54 operate on public files in Google Storage. |
| 55 item = Key(b) | 55 """ |
| 56 item.key = path | 56 boto_file_path = os.path.expanduser(boto_file_path) |
| 57 item.delete() | 57 print 'Reading boto file from %s' % boto_file_path |
| 58 boto_dict = _config_file_as_dict(filepath=boto_file_path) |
| 59 self._gs_access_key_id = boto_dict['gs_access_key_id'] |
| 60 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] |
| 58 | 61 |
| 62 def delete_file(self, bucket, path): |
| 63 """Delete a single file within a GS bucket. |
| 59 | 64 |
| 60 def upload_file(source_path, dest_bucket, dest_path): | 65 TODO(epoger): what if bucket or path does not exist? Should probably raise |
| 61 """Upload contents of a local file to Google Storage. | 66 an exception. Implement, and add a test to exercise this. |
| 62 | 67 |
| 63 TODO(epoger): Add the extra parameters provided by upload_file() within | 68 Params: |
| 64 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/u
tils/old_gs_utils.py , | 69 bucket: GS bucket to delete a file from |
| 65 so we can replace that function with this one. | 70 path: full path (Posix-style) of the file within the bucket to delete |
| 71 """ |
| 72 conn = self._create_connection() |
| 73 b = conn.get_bucket(bucket_name=bucket) |
| 74 item = Key(b) |
| 75 item.key = path |
| 76 item.delete() |
| 66 | 77 |
| 67 params: | 78 def upload_file(self, source_path, dest_bucket, dest_path): |
| 68 source_path: full path (local-OS-style) on local disk to read from | 79 """Upload contents of a local file to Google Storage. |
| 69 dest_bucket: GCS bucket to copy the file to | |
| 70 dest_path: full path (Posix-style) within that bucket | |
| 71 """ | |
| 72 conn = _create_connection() | |
| 73 b = conn.get_bucket(bucket_name=dest_bucket) | |
| 74 item = Key(b) | |
| 75 item.key = dest_path | |
| 76 item.set_contents_from_filename(filename=source_path) | |
| 77 | 80 |
| 81 TODO(epoger): Add the extra parameters provided by upload_file() within |
| 82 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts
/utils/old_gs_utils.py , |
| 83 so we can replace that function with this one. |
| 78 | 84 |
| 79 def download_file(source_bucket, source_path, dest_path, | 85 params: |
| 80 create_subdirs_if_needed=False): | 86 source_path: full path (local-OS-style) on local disk to read from |
| 81 """ Downloads a single file from Google Cloud Storage to local disk. | 87 dest_bucket: GCS bucket to copy the file to |
| 88 dest_path: full path (Posix-style) within that bucket |
| 89 """ |
| 90 conn = self._create_connection() |
| 91 b = conn.get_bucket(bucket_name=dest_bucket) |
| 92 item = Key(b) |
| 93 item.key = dest_path |
| 94 item.set_contents_from_filename(filename=source_path) |
| 82 | 95 |
| 83 Args: | 96 def download_file(self, source_bucket, source_path, dest_path, |
| 84 source_bucket: GCS bucket to download the file from | 97 create_subdirs_if_needed=False): |
| 85 source_path: full path (Posix-style) within that bucket | 98 """Downloads a single file from Google Cloud Storage to local disk. |
| 86 dest_path: full path (local-OS-style) on local disk to copy the file to | |
| 87 create_subdirs_if_needed: boolean; whether to create subdirectories as | |
| 88 needed to create dest_path | |
| 89 """ | |
| 90 conn = _create_connection() | |
| 91 b = conn.get_bucket(bucket_name=source_bucket) | |
| 92 item = Key(b) | |
| 93 item.key = source_path | |
| 94 if create_subdirs_if_needed: | |
| 95 _makedirs_if_needed(os.path.dirname(dest_path)) | |
| 96 with open(dest_path, 'w') as f: | |
| 97 item.get_contents_to_file(fp=f) | |
| 98 | 99 |
| 100 Args: |
| 101 source_bucket: GCS bucket to download the file from |
| 102 source_path: full path (Posix-style) within that bucket |
| 103 dest_path: full path (local-OS-style) on local disk to copy the file to |
| 104 create_subdirs_if_needed: boolean; whether to create subdirectories as |
| 105 needed to create dest_path |
| 106 """ |
| 107 conn = self._create_connection() |
| 108 b = conn.get_bucket(bucket_name=source_bucket) |
| 109 item = Key(b) |
| 110 item.key = source_path |
| 111 if create_subdirs_if_needed: |
| 112 _makedirs_if_needed(os.path.dirname(dest_path)) |
| 113 with open(dest_path, 'w') as f: |
| 114 item.get_contents_to_file(fp=f) |
| 99 | 115 |
| 100 def list_bucket_contents(bucket, subdir=None): | 116 def list_bucket_contents(self, bucket, subdir=None): |
| 101 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. | 117 """Returns files in the Google Storage bucket as a (dirs, files) tuple. |
| 102 | 118 |
| 103 Args: | 119 Args: |
| 104 bucket: name of the Google Storage bucket | 120 bucket: name of the Google Storage bucket |
| 105 subdir: directory within the bucket to list, or None for root directory | 121 subdir: directory within the bucket to list, or None for root directory |
| 106 """ | 122 """ |
| 107 # The GS command relies on the prefix (if any) ending with a slash. | 123 # The GS command relies on the prefix (if any) ending with a slash. |
| 108 prefix = subdir or '' | 124 prefix = subdir or '' |
| 109 if prefix and not prefix.endswith('/'): | 125 if prefix and not prefix.endswith('/'): |
| 110 prefix += '/' | 126 prefix += '/' |
| 111 prefix_length = len(prefix) if prefix else 0 | 127 prefix_length = len(prefix) if prefix else 0 |
| 112 | 128 |
| 113 conn = _create_connection() | 129 conn = self._create_connection() |
| 114 b = conn.get_bucket(bucket_name=bucket) | 130 b = conn.get_bucket(bucket_name=bucket) |
| 115 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') | 131 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
| 116 dirs = [] | 132 dirs = [] |
| 117 files = [] | 133 files = [] |
| 118 for item in lister: | 134 for item in lister: |
| 119 t = type(item) | 135 t = type(item) |
| 120 if t is Key: | 136 if t is Key: |
| 121 files.append(item.key[prefix_length:]) | 137 files.append(item.key[prefix_length:]) |
| 122 elif t is Prefix: | 138 elif t is Prefix: |
| 123 dirs.append(item.name[prefix_length:-1]) | 139 dirs.append(item.name[prefix_length:-1]) |
| 124 return (dirs, files) | 140 return (dirs, files) |
| 141 |
| 142 def _create_connection(self): |
| 143 """Returns a GSConnection object we can use to access Google Storage.""" |
| 144 return GSConnection( |
| 145 gs_access_key_id=self._gs_access_key_id, |
| 146 gs_secret_access_key=self._gs_secret_access_key) |
| 125 | 147 |
| 126 | 148 |
| 127 def _config_file_as_dict(filepath): | 149 def _config_file_as_dict(filepath): |
| 128 """Reads a boto-style config file into a dict. | 150 """Reads a boto-style config file into a dict. |
| 129 | 151 |
| 130 Parses all lines from the file of this form: key = value | 152 Parses all lines from the file of this form: key = value |
| 131 TODO(epoger): Create unittest. | 153 TODO(epoger): Create unittest. |
| 132 | 154 |
| 133 Params: | 155 Params: |
| 134 filepath: path to config file on local disk | 156 filepath: path to config file on local disk |
| 135 | 157 |
| 136 Returns: contents of the config file, as a dictionary | 158 Returns: contents of the config file, as a dictionary |
| 137 | 159 |
| 138 Raises exception if file not found. | 160 Raises exception if file not found. |
| 139 """ | 161 """ |
| 140 dic = {} | 162 dic = {} |
| 141 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') | 163 line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$') |
| 142 with open(filepath) as f: | 164 with open(filepath) as f: |
| 143 for line in f: | 165 for line in f: |
| 144 match = line_regex.match(line) | 166 match = line_regex.match(line) |
| 145 if match: | 167 if match: |
| 146 (key, value) = match.groups() | 168 (key, value) = match.groups() |
| 147 dic[key] = value | 169 dic[key] = value |
| 148 return dic | 170 return dic |
| 149 | 171 |
| 150 | 172 |
| 151 def _create_connection(boto_file_path=os.path.join('~','.boto')): | |
| 152 """Returns a GSConnection object we can use to access Google Storage. | |
| 153 | |
| 154 Params: | |
| 155 boto_file_path: full path (local-OS-style) on local disk where .boto | |
| 156 credentials file can be found | |
| 157 | |
| 158 TODO(epoger): Change this module to be object-based, where __init__() reads | |
| 159 the boto file into boto_dict once instead of repeatedly for each operation. | |
| 160 | |
| 161 TODO(epoger): if the file does not exist, rather than raising an exception, | |
| 162 create a GSConnection that can operate on public files. | |
| 163 """ | |
| 164 boto_file_path = os.path.expanduser(boto_file_path) | |
| 165 print 'Reading boto file from %s' % boto_file_path | |
| 166 boto_dict = _config_file_as_dict(filepath=boto_file_path) | |
| 167 return GSConnection( | |
| 168 gs_access_key_id=boto_dict['gs_access_key_id'], | |
| 169 gs_secret_access_key=boto_dict['gs_secret_access_key']) | |
| 170 | |
| 171 | |
| 172 def _makedirs_if_needed(path): | 173 def _makedirs_if_needed(path): |
| 173 """ Creates a directory (and any parent directories needed), if it does not | 174 """Creates a directory (and any parent directories needed), if it does not |
| 174 exist yet. | 175 exist yet. |
| 175 | 176 |
| 176 Args: | 177 Args: |
| 177 path: full path of directory to create | 178 path: full path of directory to create |
| 178 """ | 179 """ |
| 179 try: | 180 try: |
| 180 os.makedirs(path) | 181 os.makedirs(path) |
| 181 except OSError as e: | 182 except OSError as e: |
| 182 if e.errno != errno.EEXIST: | 183 if e.errno != errno.EEXIST: |
| 183 raise | 184 raise |
| 184 | 185 |
| 185 | 186 |
| 186 def _run_self_test(): | 187 def _run_self_test(): |
| 187 bucket = 'chromium-skia-gm' | 188 bucket = 'chromium-skia-gm' |
| 188 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) | 189 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
| 189 subdir = 'subdir' | 190 subdir = 'subdir' |
| 190 filenames_to_upload = ['file1', 'file2'] | 191 filenames_to_upload = ['file1', 'file2'] |
| 192 gs = GSUtils() |
| 191 | 193 |
| 192 # Upload test files to Google Storage. | 194 # Upload test files to Google Storage. |
| 193 local_src_dir = tempfile.mkdtemp() | 195 local_src_dir = tempfile.mkdtemp() |
| 194 os.mkdir(os.path.join(local_src_dir, subdir)) | 196 os.mkdir(os.path.join(local_src_dir, subdir)) |
| 195 try: | 197 try: |
| 196 for filename in filenames_to_upload: | 198 for filename in filenames_to_upload: |
| 197 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: | 199 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
| 198 f.write('contents of %s\n' % filename) | 200 f.write('contents of %s\n' % filename) |
| 199 upload_file(source_path=os.path.join(local_src_dir, subdir, filename), | 201 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |
| 200 dest_bucket=bucket, | 202 dest_bucket=bucket, |
| 201 dest_path=posixpath.join(remote_dir, subdir, filename)) | 203 dest_path=posixpath.join(remote_dir, subdir, filename)) |
| 202 finally: | 204 finally: |
| 203 shutil.rmtree(local_src_dir) | 205 shutil.rmtree(local_src_dir) |
| 204 | 206 |
| 205 # Get a list of the files we uploaded to Google Storage. | 207 # Get a list of the files we uploaded to Google Storage. |
| 206 (dirs, files) = list_bucket_contents( | 208 (dirs, files) = gs.list_bucket_contents( |
| 207 bucket=bucket, subdir=remote_dir) | 209 bucket=bucket, subdir=remote_dir) |
| 208 assert dirs == [subdir] | 210 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) |
| 209 assert files == [] | 211 assert files == [], '%s == []' % files |
| 210 (dirs, files) = list_bucket_contents( | 212 (dirs, files) = gs.list_bucket_contents( |
| 211 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 213 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
| 212 assert dirs == [] | 214 assert dirs == [], '%s == []' % dirs |
| 213 assert files == filenames_to_upload | 215 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) |
| 214 | 216 |
| 215 # Download the files we uploaded to Google Storage, and validate contents. | 217 # Download the files we uploaded to Google Storage, and validate contents. |
| 216 local_dest_dir = tempfile.mkdtemp() | 218 local_dest_dir = tempfile.mkdtemp() |
| 217 try: | 219 try: |
| 218 for filename in filenames_to_upload: | 220 for filename in filenames_to_upload: |
| 219 download_file(source_bucket=bucket, | 221 gs.download_file(source_bucket=bucket, |
| 220 source_path=posixpath.join(remote_dir, subdir, filename), | 222 source_path=posixpath.join(remote_dir, subdir, filename), |
| 221 dest_path=os.path.join(local_dest_dir, subdir, filename), | 223 dest_path=os.path.join(local_dest_dir, subdir, filename), |
| 222 create_subdirs_if_needed=True) | 224 create_subdirs_if_needed=True) |
| 223 with open(os.path.join(local_dest_dir, subdir, filename)) as f: | 225 with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
| 224 file_contents = f.read() | 226 file_contents = f.read() |
| 225 assert file_contents == 'contents of %s\n' % filename | 227 assert file_contents == 'contents of %s\n' % filename, ( |
| 228 '%s == "contents of %s\n"' % (file_contents, filename)) |
| 226 finally: | 229 finally: |
| 227 shutil.rmtree(local_dest_dir) | 230 shutil.rmtree(local_dest_dir) |
| 228 | 231 |
| 229 # Delete all the files we uploaded to Google Storage. | 232 # Delete all the files we uploaded to Google Storage. |
| 230 for filename in filenames_to_upload: | 233 for filename in filenames_to_upload: |
| 231 delete_file(bucket=bucket, | 234 gs.delete_file(bucket=bucket, |
| 232 path=posixpath.join(remote_dir, subdir, filename)) | 235 path=posixpath.join(remote_dir, subdir, filename)) |
| 233 | 236 |
| 234 # Confirm that we deleted all the files we uploaded to Google Storage. | 237 # Confirm that we deleted all the files we uploaded to Google Storage. |
| 235 (dirs, files) = list_bucket_contents( | 238 (dirs, files) = gs.list_bucket_contents( |
| 236 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 239 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
| 237 assert dirs == [] | 240 assert dirs == [], '%s == []' % dirs |
| 238 assert files == [] | 241 assert files == [], '%s == []' % files |
| 239 | 242 |
| 240 | 243 |
| 241 # TODO(epoger): How should we exercise this self-test? | 244 # TODO(epoger): How should we exercise this self-test? |
| 242 # I avoided using the standard unittest framework, because these Google Storage | 245 # I avoided using the standard unittest framework, because these Google Storage |
| 243 # operations are expensive and require .boto permissions. | 246 # operations are expensive and require .boto permissions. |
| 244 # | 247 # |
| 245 # How can we automatically test this code without wasting too many resources | 248 # How can we automatically test this code without wasting too many resources |
| 246 # or needing .boto permissions? | 249 # or needing .boto permissions? |
| 247 if __name__ == '__main__': | 250 if __name__ == '__main__': |
| 248 _run_self_test() | 251 _run_self_test() |
| OLD | NEW |