Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(149)

Unified Diff: tools/telemetry/catapult_base/cloud_storage.py

Issue 1620023002: Revert of Remove catapult_base from telemetry. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@perf_cb_move
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/telemetry/catapult_base/cloud_storage.py
diff --git a/tools/telemetry/catapult_base/cloud_storage.py b/tools/telemetry/catapult_base/cloud_storage.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe8a2cb64f4e6db21914d968521b98865f4369d1
--- /dev/null
+++ b/tools/telemetry/catapult_base/cloud_storage.py
@@ -0,0 +1,348 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
+
+import collections
+import contextlib
+import hashlib
+import logging
+import os
+import shutil
+import stat
+import subprocess
+import sys
+import tempfile
+import time
+
+try:
+ import fcntl
+except ImportError:
+ fcntl = None
+
+from catapult_base import util
+
+
+PUBLIC_BUCKET = 'chromium-telemetry'
+PARTNER_BUCKET = 'chrome-partner-telemetry'
+INTERNAL_BUCKET = 'chrome-telemetry'
+TELEMETRY_OUTPUT = 'chrome-telemetry-output'
+
+# Uses ordered dict to make sure that bucket's key-value items are ordered from
+# the most open to the most restrictive.
+BUCKET_ALIASES = collections.OrderedDict((
+ ('public', PUBLIC_BUCKET),
+ ('partner', PARTNER_BUCKET),
+ ('internal', INTERNAL_BUCKET),
+ ('output', TELEMETRY_OUTPUT),
+))
+
+BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys()
+
+
+_GSUTIL_PATH = os.path.join(util.GetCatapultDir(), 'third_party', 'gsutil',
+ 'gsutil')
+
+# TODO(tbarzic): A workaround for http://crbug.com/386416 and
+# http://crbug.com/359293. See |_RunCommand|.
+_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
+
+
+
+class CloudStorageError(Exception):
+ @staticmethod
+ def _GetConfigInstructions():
+ command = _GSUTIL_PATH
+ if util.IsRunningOnCrosDevice():
+ command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH)
+ return ('To configure your credentials:\n'
+ ' 1. Run "%s config" and follow its instructions.\n'
+ ' 2. If you have a @google.com account, use that account.\n'
+ ' 3. For the project-id, just enter 0.' % command)
+
+
+class PermissionError(CloudStorageError):
+ def __init__(self):
+ super(PermissionError, self).__init__(
+ 'Attempted to access a file from Cloud Storage but you don\'t '
+ 'have permission. ' + self._GetConfigInstructions())
+
+
+class CredentialsError(CloudStorageError):
+ def __init__(self):
+ super(CredentialsError, self).__init__(
+ 'Attempted to access a file from Cloud Storage but you have no '
+ 'configured credentials. ' + self._GetConfigInstructions())
+
+
+class NotFoundError(CloudStorageError):
+ pass
+
+
+class ServerError(CloudStorageError):
+ pass
+
+
+# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
+def _FindExecutableInPath(relative_executable_path, *extra_search_paths):
+ search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
+ for search_path in search_paths:
+ executable_path = os.path.join(search_path, relative_executable_path)
+ if util.IsExecutable(executable_path):
+ return executable_path
+ return None
+
+def _EnsureExecutable(gsutil):
+ """chmod +x if gsutil is not executable."""
+ st = os.stat(gsutil)
+ if not st.st_mode & stat.S_IEXEC:
+ os.chmod(gsutil, st.st_mode | stat.S_IEXEC)
+
+def _RunCommand(args):
+ # On cros device, as telemetry is running as root, home will be set to /root/,
+ # which is not writable. gsutil will attempt to create a download tracker dir
+ # in home dir and fail. To avoid this, override HOME dir to something writable
+ # when running on cros device.
+ #
+ # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
+ # http://crbug.com/386416, http://crbug.com/359293.
+ gsutil_env = None
+ if util.IsRunningOnCrosDevice():
+ gsutil_env = os.environ.copy()
+ gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
+
+ if os.name == 'nt':
+ # If Windows, prepend python. Python scripts aren't directly executable.
+ args = [sys.executable, _GSUTIL_PATH] + args
+ else:
+ # Don't do it on POSIX, in case someone is using a shell script to redirect.
+ args = [_GSUTIL_PATH] + args
+ _EnsureExecutable(_GSUTIL_PATH)
+
+ gsutil = subprocess.Popen(args, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, env=gsutil_env)
+ stdout, stderr = gsutil.communicate()
+
+ if gsutil.returncode:
+ if stderr.startswith((
+ 'You are attempting to access protected data with no configured',
+ 'Failure: No handler was ready to authenticate.')):
+ raise CredentialsError()
+ if ('status=403' in stderr or 'status 403' in stderr or
+ '403 Forbidden' in stderr):
+ raise PermissionError()
+ if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
+ 'No URLs matched' in stderr or 'One or more URLs matched no' in stderr):
+ raise NotFoundError(stderr)
+ if '500 Internal Server Error' in stderr:
+ raise ServerError(stderr)
+ raise CloudStorageError(stderr)
+
+ return stdout
+
+
+def List(bucket):
+ query = 'gs://%s/' % bucket
+ stdout = _RunCommand(['ls', query])
+ return [url[len(query):] for url in stdout.splitlines()]
+
+
+def Exists(bucket, remote_path):
+ try:
+ _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
+ return True
+ except NotFoundError:
+ return False
+
+
+def Move(bucket1, bucket2, remote_path):
+ url1 = 'gs://%s/%s' % (bucket1, remote_path)
+ url2 = 'gs://%s/%s' % (bucket2, remote_path)
+ logging.info('Moving %s to %s' % (url1, url2))
+ _RunCommand(['mv', url1, url2])
+
+
+def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to):
+ """Copy a file from one location in CloudStorage to another.
+
+ Args:
+ bucket_from: The cloud storage bucket where the file is currently located.
+ bucket_to: The cloud storage bucket it is being copied to.
+ remote_path_from: The file path where the file is located in bucket_from.
+ remote_path_to: The file path it is being copied to in bucket_to.
+
+ It should: cause no changes locally or to the starting file, and will
+ overwrite any existing files in the destination location.
+ """
+ url1 = 'gs://%s/%s' % (bucket_from, remote_path_from)
+ url2 = 'gs://%s/%s' % (bucket_to, remote_path_to)
+ logging.info('Copying %s to %s' % (url1, url2))
+ _RunCommand(['cp', url1, url2])
+
+
+def Delete(bucket, remote_path):
+ url = 'gs://%s/%s' % (bucket, remote_path)
+ logging.info('Deleting %s' % url)
+ _RunCommand(['rm', url])
+
+
+def Get(bucket, remote_path, local_path):
+ with _PseudoFileLock(local_path):
+ _GetLocked(bucket, remote_path, local_path)
+
+
+@contextlib.contextmanager
+def _PseudoFileLock(base_path):
+ pseudo_lock_path = '%s.pseudo_lock' % base_path
+ _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path))
+ # This is somewhat of a racy hack because we don't have a good
+ # cross-platform file lock. If we get one, this should be refactored
+ # to use it.
+ while os.path.exists(pseudo_lock_path):
+ time.sleep(0.1)
+ fd = os.open(pseudo_lock_path, os.O_RDONLY | os.O_CREAT)
+ if fcntl:
+ fcntl.flock(fd, fcntl.LOCK_EX)
+ try:
+ yield
+ finally:
+ if fcntl:
+ fcntl.flock(fd, fcntl.LOCK_UN)
+ try:
+ os.close(fd)
+ os.remove(pseudo_lock_path)
+ except OSError:
+ # We don't care if the pseudo-lock gets removed elsewhere before we have
+ # a chance to do so.
+ pass
+
+
+def _CreateDirectoryIfNecessary(directory):
+ if not os.path.exists(directory):
+ os.makedirs(directory)
+
+
+def _GetLocked(bucket, remote_path, local_path):
+ url = 'gs://%s/%s' % (bucket, remote_path)
+ logging.info('Downloading %s to %s' % (url, local_path))
+ _CreateDirectoryIfNecessary(os.path.dirname(local_path))
+ with tempfile.NamedTemporaryFile(
+ dir=os.path.dirname(local_path),
+ delete=False) as partial_download_path:
+ try:
+ # Windows won't download to an open file.
+ partial_download_path.close()
+ try:
+ _RunCommand(['cp', url, partial_download_path.name])
+ except ServerError:
+ logging.info('Cloud Storage server error, retrying download')
+ _RunCommand(['cp', url, partial_download_path.name])
+ shutil.move(partial_download_path.name, local_path)
+ finally:
+ if os.path.exists(partial_download_path.name):
+ os.remove(partial_download_path.name)
+
+
+def Insert(bucket, remote_path, local_path, publicly_readable=False):
+ """ Upload file in |local_path| to cloud storage.
+ Args:
+ bucket: the google cloud storage bucket name.
+ remote_path: the remote file path in |bucket|.
+ local_path: path of the local file to be uploaded.
+ publicly_readable: whether the uploaded file has publicly readable
+ permission.
+
+ Returns:
+ The url where the file is uploaded to.
+ """
+ url = 'gs://%s/%s' % (bucket, remote_path)
+ command_and_args = ['cp']
+ extra_info = ''
+ if publicly_readable:
+ command_and_args += ['-a', 'public-read']
+ extra_info = ' (publicly readable)'
+ command_and_args += [local_path, url]
+ logging.info('Uploading %s to %s%s' % (local_path, url, extra_info))
+ _RunCommand(command_and_args)
+ return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % (
+ bucket, remote_path)
+
+
+def GetIfHashChanged(cs_path, download_path, bucket, file_hash):
+ """Downloads |download_path| to |file_path| if |file_path| doesn't exist or
+ it's hash doesn't match |file_hash|.
+
+ Returns:
+ True if the binary was changed.
+ Raises:
+ CredentialsError if the user has no configured credentials.
+ PermissionError if the user does not have permission to access the bucket.
+ NotFoundError if the file is not in the given bucket in cloud_storage.
+ """
+ with _PseudoFileLock(download_path):
+ if (os.path.exists(download_path) and
+ CalculateHash(download_path) == file_hash):
+ return False
+ _GetLocked(bucket, cs_path, download_path)
+ return True
+
+
+def GetIfChanged(file_path, bucket):
+ """Gets the file at file_path if it has a hash file that doesn't match or
+ if there is no local copy of file_path, but there is a hash file for it.
+
+ Returns:
+ True if the binary was changed.
+ Raises:
+ CredentialsError if the user has no configured credentials.
+ PermissionError if the user does not have permission to access the bucket.
+ NotFoundError if the file is not in the given bucket in cloud_storage.
+ """
+ with _PseudoFileLock(file_path):
+ hash_path = file_path + '.sha1'
+ if not os.path.exists(hash_path):
+ logging.warning('Hash file not found: %s' % hash_path)
+ return False
+
+ expected_hash = ReadHash(hash_path)
+ if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
+ return False
+ _GetLocked(bucket, expected_hash, file_path)
+ return True
+
+
+def GetFilesInDirectoryIfChanged(directory, bucket):
+ """ Scan the directory for .sha1 files, and download them from the given
+ bucket in cloud storage if the local and remote hash don't match or
+ there is no local copy.
+ """
+ if not os.path.isdir(directory):
+ raise ValueError('Must provide a valid directory.')
+ # Don't allow the root directory to be a serving_dir.
+ if directory == os.path.abspath(os.sep):
+ raise ValueError('Trying to serve root directory from HTTP server.')
+ for dirpath, _, filenames in os.walk(directory):
+ for filename in filenames:
+ path_name, extension = os.path.splitext(
+ os.path.join(dirpath, filename))
+ if extension != '.sha1':
+ continue
+ GetIfChanged(path_name, bucket)
+
+def CalculateHash(file_path):
+ """Calculates and returns the hash of the file at file_path."""
+ sha1 = hashlib.sha1()
+ with open(file_path, 'rb') as f:
+ while True:
+ # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
+ chunk = f.read(1024*1024)
+ if not chunk:
+ break
+ sha1.update(chunk)
+ return sha1.hexdigest()
+
+
+def ReadHash(hash_path):
+ with open(hash_path, 'rb') as f:
+ return f.read(1024).rstrip()
« no previous file with comments | « tools/telemetry/catapult_base/bin/run_tests ('k') | tools/telemetry/catapult_base/dependency_manager/OWNERS » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698