| OLD | NEW |
| (Empty) |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """Wrappers for gsutil, for basic interaction with Google Cloud Storage.""" | |
| 6 | |
| 7 import collections | |
| 8 import contextlib | |
| 9 import hashlib | |
| 10 import logging | |
| 11 import os | |
| 12 import shutil | |
| 13 import stat | |
| 14 import subprocess | |
| 15 import sys | |
| 16 import tempfile | |
| 17 import time | |
| 18 | |
| 19 try: | |
| 20 import fcntl | |
| 21 except ImportError: | |
| 22 fcntl = None | |
| 23 | |
| 24 from catapult_base import util | |
| 25 | |
| 26 | |
| 27 PUBLIC_BUCKET = 'chromium-telemetry' | |
| 28 PARTNER_BUCKET = 'chrome-partner-telemetry' | |
| 29 INTERNAL_BUCKET = 'chrome-telemetry' | |
| 30 TELEMETRY_OUTPUT = 'chrome-telemetry-output' | |
| 31 | |
| 32 # Uses ordered dict to make sure that bucket's key-value items are ordered from | |
| 33 # the most open to the most restrictive. | |
| 34 BUCKET_ALIASES = collections.OrderedDict(( | |
| 35 ('public', PUBLIC_BUCKET), | |
| 36 ('partner', PARTNER_BUCKET), | |
| 37 ('internal', INTERNAL_BUCKET), | |
| 38 ('output', TELEMETRY_OUTPUT), | |
| 39 )) | |
| 40 | |
| 41 BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys() | |
| 42 | |
| 43 | |
| 44 _GSUTIL_PATH = os.path.join(util.GetCatapultDir(), 'third_party', 'gsutil', | |
| 45 'gsutil') | |
| 46 | |
| 47 # TODO(tbarzic): A workaround for http://crbug.com/386416 and | |
| 48 # http://crbug.com/359293. See |_RunCommand|. | |
| 49 _CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/' | |
| 50 | |
| 51 | |
| 52 | |
| 53 class CloudStorageError(Exception): | |
| 54 @staticmethod | |
| 55 def _GetConfigInstructions(): | |
| 56 command = _GSUTIL_PATH | |
| 57 if util.IsRunningOnCrosDevice(): | |
| 58 command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH) | |
| 59 return ('To configure your credentials:\n' | |
| 60 ' 1. Run "%s config" and follow its instructions.\n' | |
| 61 ' 2. If you have a @google.com account, use that account.\n' | |
| 62 ' 3. For the project-id, just enter 0.' % command) | |
| 63 | |
| 64 | |
| 65 class PermissionError(CloudStorageError): | |
| 66 def __init__(self): | |
| 67 super(PermissionError, self).__init__( | |
| 68 'Attempted to access a file from Cloud Storage but you don\'t ' | |
| 69 'have permission. ' + self._GetConfigInstructions()) | |
| 70 | |
| 71 | |
| 72 class CredentialsError(CloudStorageError): | |
| 73 def __init__(self): | |
| 74 super(CredentialsError, self).__init__( | |
| 75 'Attempted to access a file from Cloud Storage but you have no ' | |
| 76 'configured credentials. ' + self._GetConfigInstructions()) | |
| 77 | |
| 78 | |
| 79 class NotFoundError(CloudStorageError): | |
| 80 pass | |
| 81 | |
| 82 | |
| 83 class ServerError(CloudStorageError): | |
| 84 pass | |
| 85 | |
| 86 | |
| 87 # TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()? | |
| 88 def _FindExecutableInPath(relative_executable_path, *extra_search_paths): | |
| 89 search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep) | |
| 90 for search_path in search_paths: | |
| 91 executable_path = os.path.join(search_path, relative_executable_path) | |
| 92 if util.IsExecutable(executable_path): | |
| 93 return executable_path | |
| 94 return None | |
| 95 | |
| 96 def _EnsureExecutable(gsutil): | |
| 97 """chmod +x if gsutil is not executable.""" | |
| 98 st = os.stat(gsutil) | |
| 99 if not st.st_mode & stat.S_IEXEC: | |
| 100 os.chmod(gsutil, st.st_mode | stat.S_IEXEC) | |
| 101 | |
| 102 def _RunCommand(args): | |
| 103 # On cros device, as telemetry is running as root, home will be set to /root/, | |
| 104 # which is not writable. gsutil will attempt to create a download tracker dir | |
| 105 # in home dir and fail. To avoid this, override HOME dir to something writable | |
| 106 # when running on cros device. | |
| 107 # | |
| 108 # TODO(tbarzic): Figure out a better way to handle gsutil on cros. | |
| 109 # http://crbug.com/386416, http://crbug.com/359293. | |
| 110 gsutil_env = None | |
| 111 if util.IsRunningOnCrosDevice(): | |
| 112 gsutil_env = os.environ.copy() | |
| 113 gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR | |
| 114 | |
| 115 if os.name == 'nt': | |
| 116 # If Windows, prepend python. Python scripts aren't directly executable. | |
| 117 args = [sys.executable, _GSUTIL_PATH] + args | |
| 118 else: | |
| 119 # Don't do it on POSIX, in case someone is using a shell script to redirect. | |
| 120 args = [_GSUTIL_PATH] + args | |
| 121 _EnsureExecutable(_GSUTIL_PATH) | |
| 122 | |
| 123 gsutil = subprocess.Popen(args, stdout=subprocess.PIPE, | |
| 124 stderr=subprocess.PIPE, env=gsutil_env) | |
| 125 stdout, stderr = gsutil.communicate() | |
| 126 | |
| 127 if gsutil.returncode: | |
| 128 if stderr.startswith(( | |
| 129 'You are attempting to access protected data with no configured', | |
| 130 'Failure: No handler was ready to authenticate.')): | |
| 131 raise CredentialsError() | |
| 132 if ('status=403' in stderr or 'status 403' in stderr or | |
| 133 '403 Forbidden' in stderr): | |
| 134 raise PermissionError() | |
| 135 if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or | |
| 136 'No URLs matched' in stderr or 'One or more URLs matched no' in stderr): | |
| 137 raise NotFoundError(stderr) | |
| 138 if '500 Internal Server Error' in stderr: | |
| 139 raise ServerError(stderr) | |
| 140 raise CloudStorageError(stderr) | |
| 141 | |
| 142 return stdout | |
| 143 | |
| 144 | |
| 145 def List(bucket): | |
| 146 query = 'gs://%s/' % bucket | |
| 147 stdout = _RunCommand(['ls', query]) | |
| 148 return [url[len(query):] for url in stdout.splitlines()] | |
| 149 | |
| 150 | |
| 151 def Exists(bucket, remote_path): | |
| 152 try: | |
| 153 _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)]) | |
| 154 return True | |
| 155 except NotFoundError: | |
| 156 return False | |
| 157 | |
| 158 | |
| 159 def Move(bucket1, bucket2, remote_path): | |
| 160 url1 = 'gs://%s/%s' % (bucket1, remote_path) | |
| 161 url2 = 'gs://%s/%s' % (bucket2, remote_path) | |
| 162 logging.info('Moving %s to %s' % (url1, url2)) | |
| 163 _RunCommand(['mv', url1, url2]) | |
| 164 | |
| 165 | |
| 166 def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to): | |
| 167 """Copy a file from one location in CloudStorage to another. | |
| 168 | |
| 169 Args: | |
| 170 bucket_from: The cloud storage bucket where the file is currently located. | |
| 171 bucket_to: The cloud storage bucket it is being copied to. | |
| 172 remote_path_from: The file path where the file is located in bucket_from. | |
| 173 remote_path_to: The file path it is being copied to in bucket_to. | |
| 174 | |
| 175 It should: cause no changes locally or to the starting file, and will | |
| 176 overwrite any existing files in the destination location. | |
| 177 """ | |
| 178 url1 = 'gs://%s/%s' % (bucket_from, remote_path_from) | |
| 179 url2 = 'gs://%s/%s' % (bucket_to, remote_path_to) | |
| 180 logging.info('Copying %s to %s' % (url1, url2)) | |
| 181 _RunCommand(['cp', url1, url2]) | |
| 182 | |
| 183 | |
| 184 def Delete(bucket, remote_path): | |
| 185 url = 'gs://%s/%s' % (bucket, remote_path) | |
| 186 logging.info('Deleting %s' % url) | |
| 187 _RunCommand(['rm', url]) | |
| 188 | |
| 189 | |
| 190 def Get(bucket, remote_path, local_path): | |
| 191 with _PseudoFileLock(local_path): | |
| 192 _GetLocked(bucket, remote_path, local_path) | |
| 193 | |
| 194 | |
| 195 @contextlib.contextmanager | |
| 196 def _PseudoFileLock(base_path): | |
| 197 pseudo_lock_path = '%s.pseudo_lock' % base_path | |
| 198 _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path)) | |
| 199 # This is somewhat of a racy hack because we don't have a good | |
| 200 # cross-platform file lock. If we get one, this should be refactored | |
| 201 # to use it. | |
| 202 while os.path.exists(pseudo_lock_path): | |
| 203 time.sleep(0.1) | |
| 204 fd = os.open(pseudo_lock_path, os.O_RDONLY | os.O_CREAT) | |
| 205 if fcntl: | |
| 206 fcntl.flock(fd, fcntl.LOCK_EX) | |
| 207 try: | |
| 208 yield | |
| 209 finally: | |
| 210 if fcntl: | |
| 211 fcntl.flock(fd, fcntl.LOCK_UN) | |
| 212 try: | |
| 213 os.close(fd) | |
| 214 os.remove(pseudo_lock_path) | |
| 215 except OSError: | |
| 216 # We don't care if the pseudo-lock gets removed elsewhere before we have | |
| 217 # a chance to do so. | |
| 218 pass | |
| 219 | |
| 220 | |
| 221 def _CreateDirectoryIfNecessary(directory): | |
| 222 if not os.path.exists(directory): | |
| 223 os.makedirs(directory) | |
| 224 | |
| 225 | |
| 226 def _GetLocked(bucket, remote_path, local_path): | |
| 227 url = 'gs://%s/%s' % (bucket, remote_path) | |
| 228 logging.info('Downloading %s to %s' % (url, local_path)) | |
| 229 _CreateDirectoryIfNecessary(os.path.dirname(local_path)) | |
| 230 with tempfile.NamedTemporaryFile( | |
| 231 dir=os.path.dirname(local_path), | |
| 232 delete=False) as partial_download_path: | |
| 233 try: | |
| 234 # Windows won't download to an open file. | |
| 235 partial_download_path.close() | |
| 236 try: | |
| 237 _RunCommand(['cp', url, partial_download_path.name]) | |
| 238 except ServerError: | |
| 239 logging.info('Cloud Storage server error, retrying download') | |
| 240 _RunCommand(['cp', url, partial_download_path.name]) | |
| 241 shutil.move(partial_download_path.name, local_path) | |
| 242 finally: | |
| 243 if os.path.exists(partial_download_path.name): | |
| 244 os.remove(partial_download_path.name) | |
| 245 | |
| 246 | |
| 247 def Insert(bucket, remote_path, local_path, publicly_readable=False): | |
| 248 """ Upload file in |local_path| to cloud storage. | |
| 249 Args: | |
| 250 bucket: the google cloud storage bucket name. | |
| 251 remote_path: the remote file path in |bucket|. | |
| 252 local_path: path of the local file to be uploaded. | |
| 253 publicly_readable: whether the uploaded file has publicly readable | |
| 254 permission. | |
| 255 | |
| 256 Returns: | |
| 257 The url where the file is uploaded to. | |
| 258 """ | |
| 259 url = 'gs://%s/%s' % (bucket, remote_path) | |
| 260 command_and_args = ['cp'] | |
| 261 extra_info = '' | |
| 262 if publicly_readable: | |
| 263 command_and_args += ['-a', 'public-read'] | |
| 264 extra_info = ' (publicly readable)' | |
| 265 command_and_args += [local_path, url] | |
| 266 logging.info('Uploading %s to %s%s' % (local_path, url, extra_info)) | |
| 267 _RunCommand(command_and_args) | |
| 268 return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % ( | |
| 269 bucket, remote_path) | |
| 270 | |
| 271 | |
| 272 def GetIfHashChanged(cs_path, download_path, bucket, file_hash): | |
| 273 """Downloads |download_path| to |file_path| if |file_path| doesn't exist or | |
| 274 it's hash doesn't match |file_hash|. | |
| 275 | |
| 276 Returns: | |
| 277 True if the binary was changed. | |
| 278 Raises: | |
| 279 CredentialsError if the user has no configured credentials. | |
| 280 PermissionError if the user does not have permission to access the bucket. | |
| 281 NotFoundError if the file is not in the given bucket in cloud_storage. | |
| 282 """ | |
| 283 with _PseudoFileLock(download_path): | |
| 284 if (os.path.exists(download_path) and | |
| 285 CalculateHash(download_path) == file_hash): | |
| 286 return False | |
| 287 _GetLocked(bucket, cs_path, download_path) | |
| 288 return True | |
| 289 | |
| 290 | |
| 291 def GetIfChanged(file_path, bucket): | |
| 292 """Gets the file at file_path if it has a hash file that doesn't match or | |
| 293 if there is no local copy of file_path, but there is a hash file for it. | |
| 294 | |
| 295 Returns: | |
| 296 True if the binary was changed. | |
| 297 Raises: | |
| 298 CredentialsError if the user has no configured credentials. | |
| 299 PermissionError if the user does not have permission to access the bucket. | |
| 300 NotFoundError if the file is not in the given bucket in cloud_storage. | |
| 301 """ | |
| 302 with _PseudoFileLock(file_path): | |
| 303 hash_path = file_path + '.sha1' | |
| 304 if not os.path.exists(hash_path): | |
| 305 logging.warning('Hash file not found: %s' % hash_path) | |
| 306 return False | |
| 307 | |
| 308 expected_hash = ReadHash(hash_path) | |
| 309 if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash: | |
| 310 return False | |
| 311 _GetLocked(bucket, expected_hash, file_path) | |
| 312 return True | |
| 313 | |
| 314 | |
| 315 def GetFilesInDirectoryIfChanged(directory, bucket): | |
| 316 """ Scan the directory for .sha1 files, and download them from the given | |
| 317 bucket in cloud storage if the local and remote hash don't match or | |
| 318 there is no local copy. | |
| 319 """ | |
| 320 if not os.path.isdir(directory): | |
| 321 raise ValueError('Must provide a valid directory.') | |
| 322 # Don't allow the root directory to be a serving_dir. | |
| 323 if directory == os.path.abspath(os.sep): | |
| 324 raise ValueError('Trying to serve root directory from HTTP server.') | |
| 325 for dirpath, _, filenames in os.walk(directory): | |
| 326 for filename in filenames: | |
| 327 path_name, extension = os.path.splitext( | |
| 328 os.path.join(dirpath, filename)) | |
| 329 if extension != '.sha1': | |
| 330 continue | |
| 331 GetIfChanged(path_name, bucket) | |
| 332 | |
| 333 def CalculateHash(file_path): | |
| 334 """Calculates and returns the hash of the file at file_path.""" | |
| 335 sha1 = hashlib.sha1() | |
| 336 with open(file_path, 'rb') as f: | |
| 337 while True: | |
| 338 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. | |
| 339 chunk = f.read(1024*1024) | |
| 340 if not chunk: | |
| 341 break | |
| 342 sha1.update(chunk) | |
| 343 return sha1.hexdigest() | |
| 344 | |
| 345 | |
| 346 def ReadHash(hash_path): | |
| 347 with open(hash_path, 'rb') as f: | |
| 348 return f.read(1024).rstrip() | |
| OLD | NEW |