OLD | NEW |
(Empty) | |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 """Wrappers for gsutil, for basic interaction with Google Cloud Storage.""" |
| 6 |
| 7 import collections |
| 8 import contextlib |
| 9 import hashlib |
| 10 import logging |
| 11 import os |
| 12 import shutil |
| 13 import stat |
| 14 import subprocess |
| 15 import sys |
| 16 import tempfile |
| 17 import time |
| 18 |
| 19 try: |
| 20 import fcntl |
| 21 except ImportError: |
| 22 fcntl = None |
| 23 |
| 24 from catapult_base import util |
| 25 |
| 26 |
| 27 PUBLIC_BUCKET = 'chromium-telemetry' |
| 28 PARTNER_BUCKET = 'chrome-partner-telemetry' |
| 29 INTERNAL_BUCKET = 'chrome-telemetry' |
| 30 TELEMETRY_OUTPUT = 'chrome-telemetry-output' |
| 31 |
| 32 # Uses ordered dict to make sure that bucket's key-value items are ordered from |
| 33 # the most open to the most restrictive. |
| 34 BUCKET_ALIASES = collections.OrderedDict(( |
| 35 ('public', PUBLIC_BUCKET), |
| 36 ('partner', PARTNER_BUCKET), |
| 37 ('internal', INTERNAL_BUCKET), |
| 38 ('output', TELEMETRY_OUTPUT), |
| 39 )) |
| 40 |
| 41 BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys() |
| 42 |
| 43 |
| 44 _GSUTIL_PATH = os.path.join(util.GetCatapultDir(), 'third_party', 'gsutil', |
| 45 'gsutil') |
| 46 |
| 47 # TODO(tbarzic): A workaround for http://crbug.com/386416 and |
| 48 # http://crbug.com/359293. See |_RunCommand|. |
| 49 _CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/' |
| 50 |
| 51 |
| 52 |
| 53 class CloudStorageError(Exception): |
| 54 @staticmethod |
| 55 def _GetConfigInstructions(): |
| 56 command = _GSUTIL_PATH |
| 57 if util.IsRunningOnCrosDevice(): |
| 58 command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH) |
| 59 return ('To configure your credentials:\n' |
| 60 ' 1. Run "%s config" and follow its instructions.\n' |
| 61 ' 2. If you have a @google.com account, use that account.\n' |
| 62 ' 3. For the project-id, just enter 0.' % command) |
| 63 |
| 64 |
| 65 class PermissionError(CloudStorageError): |
| 66 def __init__(self): |
| 67 super(PermissionError, self).__init__( |
| 68 'Attempted to access a file from Cloud Storage but you don\'t ' |
| 69 'have permission. ' + self._GetConfigInstructions()) |
| 70 |
| 71 |
| 72 class CredentialsError(CloudStorageError): |
| 73 def __init__(self): |
| 74 super(CredentialsError, self).__init__( |
| 75 'Attempted to access a file from Cloud Storage but you have no ' |
| 76 'configured credentials. ' + self._GetConfigInstructions()) |
| 77 |
| 78 |
| 79 class NotFoundError(CloudStorageError): |
| 80 pass |
| 81 |
| 82 |
| 83 class ServerError(CloudStorageError): |
| 84 pass |
| 85 |
| 86 |
| 87 # TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()? |
| 88 def _FindExecutableInPath(relative_executable_path, *extra_search_paths): |
| 89 search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep) |
| 90 for search_path in search_paths: |
| 91 executable_path = os.path.join(search_path, relative_executable_path) |
| 92 if util.IsExecutable(executable_path): |
| 93 return executable_path |
| 94 return None |
| 95 |
| 96 def _EnsureExecutable(gsutil): |
| 97 """chmod +x if gsutil is not executable.""" |
| 98 st = os.stat(gsutil) |
| 99 if not st.st_mode & stat.S_IEXEC: |
| 100 os.chmod(gsutil, st.st_mode | stat.S_IEXEC) |
| 101 |
| 102 def _RunCommand(args): |
| 103 # On cros device, as telemetry is running as root, home will be set to /root/, |
| 104 # which is not writable. gsutil will attempt to create a download tracker dir |
| 105 # in home dir and fail. To avoid this, override HOME dir to something writable |
| 106 # when running on cros device. |
| 107 # |
| 108 # TODO(tbarzic): Figure out a better way to handle gsutil on cros. |
| 109 # http://crbug.com/386416, http://crbug.com/359293. |
| 110 gsutil_env = None |
| 111 if util.IsRunningOnCrosDevice(): |
| 112 gsutil_env = os.environ.copy() |
| 113 gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR |
| 114 |
| 115 if os.name == 'nt': |
| 116 # If Windows, prepend python. Python scripts aren't directly executable. |
| 117 args = [sys.executable, _GSUTIL_PATH] + args |
| 118 else: |
| 119 # Don't do it on POSIX, in case someone is using a shell script to redirect. |
| 120 args = [_GSUTIL_PATH] + args |
| 121 _EnsureExecutable(_GSUTIL_PATH) |
| 122 |
| 123 gsutil = subprocess.Popen(args, stdout=subprocess.PIPE, |
| 124 stderr=subprocess.PIPE, env=gsutil_env) |
| 125 stdout, stderr = gsutil.communicate() |
| 126 |
| 127 if gsutil.returncode: |
| 128 if stderr.startswith(( |
| 129 'You are attempting to access protected data with no configured', |
| 130 'Failure: No handler was ready to authenticate.')): |
| 131 raise CredentialsError() |
| 132 if ('status=403' in stderr or 'status 403' in stderr or |
| 133 '403 Forbidden' in stderr): |
| 134 raise PermissionError() |
| 135 if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or |
| 136 'No URLs matched' in stderr or 'One or more URLs matched no' in stderr): |
| 137 raise NotFoundError(stderr) |
| 138 if '500 Internal Server Error' in stderr: |
| 139 raise ServerError(stderr) |
| 140 raise CloudStorageError(stderr) |
| 141 |
| 142 return stdout |
| 143 |
| 144 |
| 145 def List(bucket): |
| 146 query = 'gs://%s/' % bucket |
| 147 stdout = _RunCommand(['ls', query]) |
| 148 return [url[len(query):] for url in stdout.splitlines()] |
| 149 |
| 150 |
| 151 def Exists(bucket, remote_path): |
| 152 try: |
| 153 _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)]) |
| 154 return True |
| 155 except NotFoundError: |
| 156 return False |
| 157 |
| 158 |
| 159 def Move(bucket1, bucket2, remote_path): |
| 160 url1 = 'gs://%s/%s' % (bucket1, remote_path) |
| 161 url2 = 'gs://%s/%s' % (bucket2, remote_path) |
| 162 logging.info('Moving %s to %s' % (url1, url2)) |
| 163 _RunCommand(['mv', url1, url2]) |
| 164 |
| 165 |
| 166 def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to): |
| 167 """Copy a file from one location in CloudStorage to another. |
| 168 |
| 169 Args: |
| 170 bucket_from: The cloud storage bucket where the file is currently located. |
| 171 bucket_to: The cloud storage bucket it is being copied to. |
| 172 remote_path_from: The file path where the file is located in bucket_from. |
| 173 remote_path_to: The file path it is being copied to in bucket_to. |
| 174 |
| 175 It should: cause no changes locally or to the starting file, and will |
| 176 overwrite any existing files in the destination location. |
| 177 """ |
| 178 url1 = 'gs://%s/%s' % (bucket_from, remote_path_from) |
| 179 url2 = 'gs://%s/%s' % (bucket_to, remote_path_to) |
| 180 logging.info('Copying %s to %s' % (url1, url2)) |
| 181 _RunCommand(['cp', url1, url2]) |
| 182 |
| 183 |
| 184 def Delete(bucket, remote_path): |
| 185 url = 'gs://%s/%s' % (bucket, remote_path) |
| 186 logging.info('Deleting %s' % url) |
| 187 _RunCommand(['rm', url]) |
| 188 |
| 189 |
| 190 def Get(bucket, remote_path, local_path): |
| 191 with _PseudoFileLock(local_path): |
| 192 _GetLocked(bucket, remote_path, local_path) |
| 193 |
| 194 |
| 195 @contextlib.contextmanager |
| 196 def _PseudoFileLock(base_path): |
| 197 pseudo_lock_path = '%s.pseudo_lock' % base_path |
| 198 _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path)) |
| 199 # This is somewhat of a racy hack because we don't have a good |
| 200 # cross-platform file lock. If we get one, this should be refactored |
| 201 # to use it. |
| 202 while os.path.exists(pseudo_lock_path): |
| 203 time.sleep(0.1) |
| 204 fd = os.open(pseudo_lock_path, os.O_RDONLY | os.O_CREAT) |
| 205 if fcntl: |
| 206 fcntl.flock(fd, fcntl.LOCK_EX) |
| 207 try: |
| 208 yield |
| 209 finally: |
| 210 if fcntl: |
| 211 fcntl.flock(fd, fcntl.LOCK_UN) |
| 212 try: |
| 213 os.close(fd) |
| 214 os.remove(pseudo_lock_path) |
| 215 except OSError: |
| 216 # We don't care if the pseudo-lock gets removed elsewhere before we have |
| 217 # a chance to do so. |
| 218 pass |
| 219 |
| 220 |
| 221 def _CreateDirectoryIfNecessary(directory): |
| 222 if not os.path.exists(directory): |
| 223 os.makedirs(directory) |
| 224 |
| 225 |
| 226 def _GetLocked(bucket, remote_path, local_path): |
| 227 url = 'gs://%s/%s' % (bucket, remote_path) |
| 228 logging.info('Downloading %s to %s' % (url, local_path)) |
| 229 _CreateDirectoryIfNecessary(os.path.dirname(local_path)) |
| 230 with tempfile.NamedTemporaryFile( |
| 231 dir=os.path.dirname(local_path), |
| 232 delete=False) as partial_download_path: |
| 233 try: |
| 234 # Windows won't download to an open file. |
| 235 partial_download_path.close() |
| 236 try: |
| 237 _RunCommand(['cp', url, partial_download_path.name]) |
| 238 except ServerError: |
| 239 logging.info('Cloud Storage server error, retrying download') |
| 240 _RunCommand(['cp', url, partial_download_path.name]) |
| 241 shutil.move(partial_download_path.name, local_path) |
| 242 finally: |
| 243 if os.path.exists(partial_download_path.name): |
| 244 os.remove(partial_download_path.name) |
| 245 |
| 246 |
| 247 def Insert(bucket, remote_path, local_path, publicly_readable=False): |
| 248 """ Upload file in |local_path| to cloud storage. |
| 249 Args: |
| 250 bucket: the google cloud storage bucket name. |
| 251 remote_path: the remote file path in |bucket|. |
| 252 local_path: path of the local file to be uploaded. |
| 253 publicly_readable: whether the uploaded file has publicly readable |
| 254 permission. |
| 255 |
| 256 Returns: |
| 257 The url where the file is uploaded to. |
| 258 """ |
| 259 url = 'gs://%s/%s' % (bucket, remote_path) |
| 260 command_and_args = ['cp'] |
| 261 extra_info = '' |
| 262 if publicly_readable: |
| 263 command_and_args += ['-a', 'public-read'] |
| 264 extra_info = ' (publicly readable)' |
| 265 command_and_args += [local_path, url] |
| 266 logging.info('Uploading %s to %s%s' % (local_path, url, extra_info)) |
| 267 _RunCommand(command_and_args) |
| 268 return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % ( |
| 269 bucket, remote_path) |
| 270 |
| 271 |
| 272 def GetIfHashChanged(cs_path, download_path, bucket, file_hash): |
| 273 """Downloads |download_path| to |file_path| if |file_path| doesn't exist or |
| 274 it's hash doesn't match |file_hash|. |
| 275 |
| 276 Returns: |
| 277 True if the binary was changed. |
| 278 Raises: |
| 279 CredentialsError if the user has no configured credentials. |
| 280 PermissionError if the user does not have permission to access the bucket. |
| 281 NotFoundError if the file is not in the given bucket in cloud_storage. |
| 282 """ |
| 283 with _PseudoFileLock(download_path): |
| 284 if (os.path.exists(download_path) and |
| 285 CalculateHash(download_path) == file_hash): |
| 286 return False |
| 287 _GetLocked(bucket, cs_path, download_path) |
| 288 return True |
| 289 |
| 290 |
| 291 def GetIfChanged(file_path, bucket): |
| 292 """Gets the file at file_path if it has a hash file that doesn't match or |
| 293 if there is no local copy of file_path, but there is a hash file for it. |
| 294 |
| 295 Returns: |
| 296 True if the binary was changed. |
| 297 Raises: |
| 298 CredentialsError if the user has no configured credentials. |
| 299 PermissionError if the user does not have permission to access the bucket. |
| 300 NotFoundError if the file is not in the given bucket in cloud_storage. |
| 301 """ |
| 302 with _PseudoFileLock(file_path): |
| 303 hash_path = file_path + '.sha1' |
| 304 if not os.path.exists(hash_path): |
| 305 logging.warning('Hash file not found: %s' % hash_path) |
| 306 return False |
| 307 |
| 308 expected_hash = ReadHash(hash_path) |
| 309 if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash: |
| 310 return False |
| 311 _GetLocked(bucket, expected_hash, file_path) |
| 312 return True |
| 313 |
| 314 |
| 315 def GetFilesInDirectoryIfChanged(directory, bucket): |
| 316 """ Scan the directory for .sha1 files, and download them from the given |
| 317 bucket in cloud storage if the local and remote hash don't match or |
| 318 there is no local copy. |
| 319 """ |
| 320 if not os.path.isdir(directory): |
| 321 raise ValueError('Must provide a valid directory.') |
| 322 # Don't allow the root directory to be a serving_dir. |
| 323 if directory == os.path.abspath(os.sep): |
| 324 raise ValueError('Trying to serve root directory from HTTP server.') |
| 325 for dirpath, _, filenames in os.walk(directory): |
| 326 for filename in filenames: |
| 327 path_name, extension = os.path.splitext( |
| 328 os.path.join(dirpath, filename)) |
| 329 if extension != '.sha1': |
| 330 continue |
| 331 GetIfChanged(path_name, bucket) |
| 332 |
| 333 def CalculateHash(file_path): |
| 334 """Calculates and returns the hash of the file at file_path.""" |
| 335 sha1 = hashlib.sha1() |
| 336 with open(file_path, 'rb') as f: |
| 337 while True: |
| 338 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. |
| 339 chunk = f.read(1024*1024) |
| 340 if not chunk: |
| 341 break |
| 342 sha1.update(chunk) |
| 343 return sha1.hexdigest() |
| 344 |
| 345 |
| 346 def ReadHash(hash_path): |
| 347 with open(hash_path, 'rb') as f: |
| 348 return f.read(1024).rstrip() |
OLD | NEW |