Index: tools/telemetry/catapult_base/cloud_storage.py |
diff --git a/tools/telemetry/catapult_base/cloud_storage.py b/tools/telemetry/catapult_base/cloud_storage.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..fe8a2cb64f4e6db21914d968521b98865f4369d1 |
--- /dev/null |
+++ b/tools/telemetry/catapult_base/cloud_storage.py |
@@ -0,0 +1,348 @@ |
+# Copyright 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Wrappers for gsutil, for basic interaction with Google Cloud Storage.""" |
+ |
+import collections |
+import contextlib |
+import hashlib |
+import logging |
+import os |
+import shutil |
+import stat |
+import subprocess |
+import sys |
+import tempfile |
+import time |
+ |
+try: |
+ import fcntl |
+except ImportError: |
+ fcntl = None |
+ |
+from catapult_base import util |
+ |
+ |
+PUBLIC_BUCKET = 'chromium-telemetry' |
+PARTNER_BUCKET = 'chrome-partner-telemetry' |
+INTERNAL_BUCKET = 'chrome-telemetry' |
+TELEMETRY_OUTPUT = 'chrome-telemetry-output' |
+ |
+# Uses ordered dict to make sure that bucket's key-value items are ordered from |
+# the most open to the most restrictive. |
+BUCKET_ALIASES = collections.OrderedDict(( |
+ ('public', PUBLIC_BUCKET), |
+ ('partner', PARTNER_BUCKET), |
+ ('internal', INTERNAL_BUCKET), |
+ ('output', TELEMETRY_OUTPUT), |
+)) |
+ |
+BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys() |
+ |
+ |
+_GSUTIL_PATH = os.path.join(util.GetCatapultDir(), 'third_party', 'gsutil', |
+ 'gsutil') |
+ |
+# TODO(tbarzic): A workaround for http://crbug.com/386416 and |
+# http://crbug.com/359293. See |_RunCommand|. |
+_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/' |
+ |
+ |
+ |
+class CloudStorageError(Exception): |
+ @staticmethod |
+ def _GetConfigInstructions(): |
+ command = _GSUTIL_PATH |
+ if util.IsRunningOnCrosDevice(): |
+ command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH) |
+ return ('To configure your credentials:\n' |
+ ' 1. Run "%s config" and follow its instructions.\n' |
+ ' 2. If you have a @google.com account, use that account.\n' |
+ ' 3. For the project-id, just enter 0.' % command) |
+ |
+ |
+class PermissionError(CloudStorageError): |
+ def __init__(self): |
+ super(PermissionError, self).__init__( |
+ 'Attempted to access a file from Cloud Storage but you don\'t ' |
+ 'have permission. ' + self._GetConfigInstructions()) |
+ |
+ |
+class CredentialsError(CloudStorageError): |
+ def __init__(self): |
+ super(CredentialsError, self).__init__( |
+ 'Attempted to access a file from Cloud Storage but you have no ' |
+ 'configured credentials. ' + self._GetConfigInstructions()) |
+ |
+ |
+class NotFoundError(CloudStorageError): |
+ pass |
+ |
+ |
+class ServerError(CloudStorageError): |
+ pass |
+ |
+ |
+# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()? |
+def _FindExecutableInPath(relative_executable_path, *extra_search_paths): |
+ search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep) |
+ for search_path in search_paths: |
+ executable_path = os.path.join(search_path, relative_executable_path) |
+ if util.IsExecutable(executable_path): |
+ return executable_path |
+ return None |
+ |
+def _EnsureExecutable(gsutil): |
+ """chmod +x if gsutil is not executable.""" |
+ st = os.stat(gsutil) |
+ if not st.st_mode & stat.S_IEXEC: |
+ os.chmod(gsutil, st.st_mode | stat.S_IEXEC) |
+ |
+def _RunCommand(args): |
+ # On cros device, as telemetry is running as root, home will be set to /root/, |
+ # which is not writable. gsutil will attempt to create a download tracker dir |
+ # in home dir and fail. To avoid this, override HOME dir to something writable |
+ # when running on cros device. |
+ # |
+ # TODO(tbarzic): Figure out a better way to handle gsutil on cros. |
+ # http://crbug.com/386416, http://crbug.com/359293. |
+ gsutil_env = None |
+ if util.IsRunningOnCrosDevice(): |
+ gsutil_env = os.environ.copy() |
+ gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR |
+ |
+ if os.name == 'nt': |
+ # If Windows, prepend python. Python scripts aren't directly executable. |
+ args = [sys.executable, _GSUTIL_PATH] + args |
+ else: |
+ # Don't do it on POSIX, in case someone is using a shell script to redirect. |
+ args = [_GSUTIL_PATH] + args |
+ _EnsureExecutable(_GSUTIL_PATH) |
+ |
+ gsutil = subprocess.Popen(args, stdout=subprocess.PIPE, |
+ stderr=subprocess.PIPE, env=gsutil_env) |
+ stdout, stderr = gsutil.communicate() |
+ |
+ if gsutil.returncode: |
+ if stderr.startswith(( |
+ 'You are attempting to access protected data with no configured', |
+ 'Failure: No handler was ready to authenticate.')): |
+ raise CredentialsError() |
+ if ('status=403' in stderr or 'status 403' in stderr or |
+ '403 Forbidden' in stderr): |
+ raise PermissionError() |
+ if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or |
+ 'No URLs matched' in stderr or 'One or more URLs matched no' in stderr): |
+ raise NotFoundError(stderr) |
+ if '500 Internal Server Error' in stderr: |
+ raise ServerError(stderr) |
+ raise CloudStorageError(stderr) |
+ |
+ return stdout |
+ |
+ |
+def List(bucket): |
+ query = 'gs://%s/' % bucket |
+ stdout = _RunCommand(['ls', query]) |
+ return [url[len(query):] for url in stdout.splitlines()] |
+ |
+ |
+def Exists(bucket, remote_path): |
+ try: |
+ _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)]) |
+ return True |
+ except NotFoundError: |
+ return False |
+ |
+ |
+def Move(bucket1, bucket2, remote_path): |
+ url1 = 'gs://%s/%s' % (bucket1, remote_path) |
+ url2 = 'gs://%s/%s' % (bucket2, remote_path) |
+ logging.info('Moving %s to %s' % (url1, url2)) |
+ _RunCommand(['mv', url1, url2]) |
+ |
+ |
+def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to): |
+ """Copy a file from one location in CloudStorage to another. |
+ |
+ Args: |
+ bucket_from: The cloud storage bucket where the file is currently located. |
+ bucket_to: The cloud storage bucket it is being copied to. |
+ remote_path_from: The file path where the file is located in bucket_from. |
+ remote_path_to: The file path it is being copied to in bucket_to. |
+ |
+ It should: cause no changes locally or to the starting file, and will |
+ overwrite any existing files in the destination location. |
+ """ |
+ url1 = 'gs://%s/%s' % (bucket_from, remote_path_from) |
+ url2 = 'gs://%s/%s' % (bucket_to, remote_path_to) |
+ logging.info('Copying %s to %s' % (url1, url2)) |
+ _RunCommand(['cp', url1, url2]) |
+ |
+ |
+def Delete(bucket, remote_path): |
+ url = 'gs://%s/%s' % (bucket, remote_path) |
+ logging.info('Deleting %s' % url) |
+ _RunCommand(['rm', url]) |
+ |
+ |
+def Get(bucket, remote_path, local_path): |
+ with _PseudoFileLock(local_path): |
+ _GetLocked(bucket, remote_path, local_path) |
+ |
+ |
+@contextlib.contextmanager |
+def _PseudoFileLock(base_path): |
+ pseudo_lock_path = '%s.pseudo_lock' % base_path |
+ _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path)) |
+ # This is somewhat of a racy hack because we don't have a good |
+ # cross-platform file lock. If we get one, this should be refactored |
+ # to use it. |
+ while os.path.exists(pseudo_lock_path): |
+ time.sleep(0.1) |
+ fd = os.open(pseudo_lock_path, os.O_RDONLY | os.O_CREAT) |
+ if fcntl: |
+ fcntl.flock(fd, fcntl.LOCK_EX) |
+ try: |
+ yield |
+ finally: |
+ if fcntl: |
+ fcntl.flock(fd, fcntl.LOCK_UN) |
+ try: |
+ os.close(fd) |
+ os.remove(pseudo_lock_path) |
+ except OSError: |
+ # We don't care if the pseudo-lock gets removed elsewhere before we have |
+ # a chance to do so. |
+ pass |
+ |
+ |
+def _CreateDirectoryIfNecessary(directory): |
+ if not os.path.exists(directory): |
+ os.makedirs(directory) |
+ |
+ |
+def _GetLocked(bucket, remote_path, local_path): |
+ url = 'gs://%s/%s' % (bucket, remote_path) |
+ logging.info('Downloading %s to %s' % (url, local_path)) |
+ _CreateDirectoryIfNecessary(os.path.dirname(local_path)) |
+ with tempfile.NamedTemporaryFile( |
+ dir=os.path.dirname(local_path), |
+ delete=False) as partial_download_path: |
+ try: |
+ # Windows won't download to an open file. |
+ partial_download_path.close() |
+ try: |
+ _RunCommand(['cp', url, partial_download_path.name]) |
+ except ServerError: |
+ logging.info('Cloud Storage server error, retrying download') |
+ _RunCommand(['cp', url, partial_download_path.name]) |
+ shutil.move(partial_download_path.name, local_path) |
+ finally: |
+ if os.path.exists(partial_download_path.name): |
+ os.remove(partial_download_path.name) |
+ |
+ |
+def Insert(bucket, remote_path, local_path, publicly_readable=False): |
+ """ Upload file in |local_path| to cloud storage. |
+ Args: |
+ bucket: the google cloud storage bucket name. |
+ remote_path: the remote file path in |bucket|. |
+ local_path: path of the local file to be uploaded. |
+ publicly_readable: whether the uploaded file has publicly readable |
+ permission. |
+ |
+ Returns: |
+ The url where the file is uploaded to. |
+ """ |
+ url = 'gs://%s/%s' % (bucket, remote_path) |
+ command_and_args = ['cp'] |
+ extra_info = '' |
+ if publicly_readable: |
+ command_and_args += ['-a', 'public-read'] |
+ extra_info = ' (publicly readable)' |
+ command_and_args += [local_path, url] |
+ logging.info('Uploading %s to %s%s' % (local_path, url, extra_info)) |
+ _RunCommand(command_and_args) |
+ return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % ( |
+ bucket, remote_path) |
+ |
+ |
+def GetIfHashChanged(cs_path, download_path, bucket, file_hash): |
+ """Downloads |download_path| to |file_path| if |file_path| doesn't exist or |
+ it's hash doesn't match |file_hash|. |
+ |
+ Returns: |
+ True if the binary was changed. |
+ Raises: |
+ CredentialsError if the user has no configured credentials. |
+ PermissionError if the user does not have permission to access the bucket. |
+ NotFoundError if the file is not in the given bucket in cloud_storage. |
+ """ |
+ with _PseudoFileLock(download_path): |
+ if (os.path.exists(download_path) and |
+ CalculateHash(download_path) == file_hash): |
+ return False |
+ _GetLocked(bucket, cs_path, download_path) |
+ return True |
+ |
+ |
+def GetIfChanged(file_path, bucket): |
+ """Gets the file at file_path if it has a hash file that doesn't match or |
+ if there is no local copy of file_path, but there is a hash file for it. |
+ |
+ Returns: |
+ True if the binary was changed. |
+ Raises: |
+ CredentialsError if the user has no configured credentials. |
+ PermissionError if the user does not have permission to access the bucket. |
+ NotFoundError if the file is not in the given bucket in cloud_storage. |
+ """ |
+ with _PseudoFileLock(file_path): |
+ hash_path = file_path + '.sha1' |
+ if not os.path.exists(hash_path): |
+ logging.warning('Hash file not found: %s' % hash_path) |
+ return False |
+ |
+ expected_hash = ReadHash(hash_path) |
+ if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash: |
+ return False |
+ _GetLocked(bucket, expected_hash, file_path) |
+ return True |
+ |
+ |
+def GetFilesInDirectoryIfChanged(directory, bucket): |
+ """ Scan the directory for .sha1 files, and download them from the given |
+ bucket in cloud storage if the local and remote hash don't match or |
+ there is no local copy. |
+ """ |
+ if not os.path.isdir(directory): |
+ raise ValueError('Must provide a valid directory.') |
+ # Don't allow the root directory to be a serving_dir. |
+ if directory == os.path.abspath(os.sep): |
+ raise ValueError('Trying to serve root directory from HTTP server.') |
+ for dirpath, _, filenames in os.walk(directory): |
+ for filename in filenames: |
+ path_name, extension = os.path.splitext( |
+ os.path.join(dirpath, filename)) |
+ if extension != '.sha1': |
+ continue |
+ GetIfChanged(path_name, bucket) |
+ |
+def CalculateHash(file_path): |
+ """Calculates and returns the hash of the file at file_path.""" |
+ sha1 = hashlib.sha1() |
+ with open(file_path, 'rb') as f: |
+ while True: |
+ # Read in 1mb chunks, so it doesn't all have to be loaded into memory. |
+ chunk = f.read(1024*1024) |
+ if not chunk: |
+ break |
+ sha1.update(chunk) |
+ return sha1.hexdigest() |
+ |
+ |
+def ReadHash(hash_path): |
+ with open(hash_path, 'rb') as f: |
+ return f.read(1024).rstrip() |