| Index: slave/skia_slave_scripts/utils/old_gs_utils.py
|
| diff --git a/slave/skia_slave_scripts/utils/old_gs_utils.py b/slave/skia_slave_scripts/utils/old_gs_utils.py
|
| deleted file mode 100644
|
| index 903426c1b8b53cb58114c488d3296c520d15ac34..0000000000000000000000000000000000000000
|
| --- a/slave/skia_slave_scripts/utils/old_gs_utils.py
|
| +++ /dev/null
|
| @@ -1,458 +0,0 @@
|
| -#!/usr/bin/env python
|
| -# Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -# Use of this source code is governed by a BSD-style license that can be
|
| -# found in the LICENSE file.
|
| -
|
| -"""This module contains utilities related to Google Storage manipulations.
|
| -
|
| -TODO(epoger): Replace this old gs_utils.py with a new one, within the common
|
| -repo, that uses google-api-python-client rather than the gsutil tool.
|
| -See http://skbug.com/2618 ('buildbot code: use google-api-python-client instead
|
| -of gsutil tool')
|
| -"""
|
| -
|
| -import hashlib
|
| -import os
|
| -import posixpath
|
| -import re
|
| -import shutil
|
| -import tempfile
|
| -import time
|
| -
|
| -from py.utils import shell_utils
|
| -from slave import slave_utils
|
| -
|
| -import file_utils
|
| -
|
| -
|
| -DEFAULT_DEST_GSBASE = 'gs://chromium-skia-gm'
|
| -TIMESTAMP_STARTED_FILENAME = 'TIMESTAMP_LAST_UPLOAD_STARTED'
|
| -TIMESTAMP_COMPLETED_FILENAME = 'TIMESTAMP_LAST_UPLOAD_COMPLETED'
|
| -LAST_REBASELINED_BY_FILENAME = 'LAST_REBASELINED_BY'
|
| -
|
| -FILES_CHUNK = 500
|
| -BUFSIZE = 64 * 1024
|
| -
|
| -ETAG_REGEX = re.compile(r'ETag:\s*(\S+)')
|
| -
|
| -
|
| -def delete_storage_object(object_name):
|
| - """Delete an object on Google Storage."""
|
| - gsutil = slave_utils.GSUtilSetup()
|
| - command = [gsutil]
|
| - command.extend(['rm', '-R', object_name])
|
| - print 'Running command: %s' % command
|
| - shell_utils.run(command)
|
| -
|
| -
|
| -def upload_file(local_src_path, remote_dest_path, gs_acl='private',
|
| - http_header_lines=None, only_if_modified=False):
|
| - """Upload contents of a local file to Google Storage.
|
| -
|
| - params:
|
| - local_src_path: path to file on local disk
|
| - remote_dest_path: GS URL (gs://BUCKETNAME/PATH)
|
| - gs_acl: which predefined ACL to apply to the file on Google Storage; see
|
| - https://developers.google.com/storage/docs/accesscontrol#extension
|
| - http_header_lines: a list of HTTP header strings to add, if any
|
| - only_if_modified: if True, only upload the file if it would actually change
|
| - the content on Google Storage (uploads the file if remote_dest_path
|
| - does not exist, or if it exists but has different contents than
|
| - local_src_path). Note that this may take longer than just uploading the
|
| - file without checking first, due to extra round-trips!
|
| -
|
| - TODO(epoger): Consider adding a do_compress parameter that would compress
|
| - the file using gzip before upload, and add a "Content-Encoding:gzip" header
|
| - so that HTTP downloads of the file would be unzipped automatically.
|
| - See https://developers.google.com/storage/docs/gsutil/addlhelp/
|
| - WorkingWithObjectMetadata#content-encoding
|
| - """
|
| - gsutil = slave_utils.GSUtilSetup()
|
| -
|
| - if only_if_modified:
|
| - # Return early if we don't need to do the upload.
|
| - command = [gsutil, 'ls', '-L', remote_dest_path]
|
| - try:
|
| - ls_output = shell_utils.run(command)
|
| - matches = ETAG_REGEX.search(ls_output)
|
| - if matches:
|
| - # TODO(epoger): In my testing, this has always returned an MD5 hash
|
| - # that is comparable to local_md5 below. But from my reading of
|
| - # https://developers.google.com/storage/docs/hashes-etags , this is
|
| - # not something we can always rely on ("composite objects don't support
|
| - # MD5 hashes"; I'm not sure if we ever encounter composite objects,
|
| - # though). It would be good for us to find a more reliable hash, but
|
| - # I haven't found a way to get one out of gsutil yet.
|
| - #
|
| - # For now: if the remote_md5 is not found, or is computed in
|
| - # such a way that is different from local_md5, then we will re-upload
|
| - # the file even if it did not change.
|
| - remote_md5 = matches.group(1)
|
| - hasher = hashlib.md5()
|
| - with open(local_src_path, 'rb') as filereader:
|
| - while True:
|
| - data = filereader.read(BUFSIZE)
|
| - if not data:
|
| - break
|
| - hasher.update(data)
|
| - local_md5 = hasher.hexdigest()
|
| - if local_md5 == remote_md5:
|
| - print ('local_src_path %s and remote_dest_path %s have same hash %s' %
|
| - (local_src_path, remote_dest_path, local_md5))
|
| - return
|
| - except shell_utils.CommandFailedException:
|
| - # remote_dest_path probably does not exist. Go ahead and do the upload.
|
| - pass
|
| -
|
| - command = [gsutil]
|
| - if http_header_lines:
|
| - for http_header_line in http_header_lines:
|
| - command.extend(['-h', http_header_line])
|
| - command.extend(['cp', '-a', gs_acl, local_src_path, remote_dest_path])
|
| - print 'Running command: %s' % command
|
| - shell_utils.run(command)
|
| -
|
| -
|
| -def upload_dir_contents(local_src_dir, remote_dest_dir, gs_acl='private',
|
| - http_header_lines=None):
|
| - """Upload contents of a local directory to Google Storage.
|
| -
|
| - params:
|
| - local_src_dir: directory on local disk to upload contents of
|
| - remote_dest_dir: GS URL (gs://BUCKETNAME/PATH)
|
| - gs_acl: which predefined ACL to apply to the files on Google Storage; see
|
| - https://developers.google.com/storage/docs/accesscontrol#extension
|
| - http_header_lines: a list of HTTP header strings to add, if any
|
| -
|
| - The copy operates as a "merge with overwrite": any files in src_dir will be
|
| - "overlaid" on top of the existing content in dest_dir. Existing files with
|
| - the same names will be overwritten.
|
| -
|
| - We upload each file as a separate call to gsutil. This takes longer than
|
| - calling "gsutil -m cp -R <source> <dest>", which can perform the uploads in
|
| - parallel... but in http://skbug.com/2618 ('The Case of the Missing
|
| - Mandrills') we figured out that was silently failing in some cases!
|
| -
|
| - TODO(epoger): Use the google-api-python-client API, like we do in
|
| - https://skia.googlesource.com/skia/+/master/tools/pyutils/gs_utils.py ,
|
| - rather than calling out to the gsutil tool. See http://skbug.com/2618
|
| -
|
| - TODO(epoger): Upload multiple files simultaneously to reduce latency.
|
| -
|
| - TODO(epoger): Add a "noclobber" mode that will not upload any files would
|
| - overwrite existing files in Google Storage.
|
| -
|
| - TODO(epoger): Consider adding a do_compress parameter that would compress
|
| - the file using gzip before upload, and add a "Content-Encoding:gzip" header
|
| - so that HTTP downloads of the file would be unzipped automatically.
|
| - See https://developers.google.com/storage/docs/gsutil/addlhelp/
|
| - WorkingWithObjectMetadata#content-encoding
|
| - """
|
| - gsutil = slave_utils.GSUtilSetup()
|
| - command = [gsutil]
|
| - if http_header_lines:
|
| - for http_header_line in http_header_lines:
|
| - command.extend(['-h', http_header_line])
|
| - command.extend(['cp', '-a', gs_acl])
|
| -
|
| - abs_local_src_dir = os.path.abspath(local_src_dir)
|
| - for (abs_src_dirpath, _, filenames) in os.walk(abs_local_src_dir):
|
| - if abs_src_dirpath == abs_local_src_dir:
|
| - # This file is within local_src_dir; no need to add subdirs to
|
| - # abs_dest_dirpath.
|
| - abs_dest_dirpath = remote_dest_dir
|
| - else:
|
| - # This file is within a subdir, so add subdirs to abs_dest_dirpath.
|
| - abs_dest_dirpath = posixpath.join(
|
| - remote_dest_dir,
|
| - _convert_to_posixpath(
|
| - os.path.relpath(abs_src_dirpath, abs_local_src_dir)))
|
| - for filename in sorted(filenames):
|
| - abs_src_filepath = os.path.join(abs_src_dirpath, filename)
|
| - abs_dest_filepath = posixpath.join(abs_dest_dirpath, filename)
|
| - shell_utils.run(command + [abs_src_filepath, abs_dest_filepath])
|
| -
|
| -
|
| -def download_dir_contents(remote_src_dir, local_dest_dir, multi=True):
|
| - """Download contents of a Google Storage directory to local disk.
|
| -
|
| - params:
|
| - remote_src_dir: GS URL (gs://BUCKETNAME/PATH)
|
| - local_dest_dir: directory on local disk to write the contents into
|
| - multi: boolean; whether to perform the copy in multithreaded mode.
|
| -
|
| - The copy operates as a "merge with overwrite": any files in src_dir will be
|
| - "overlaid" on top of the existing content in dest_dir. Existing files with
|
| - the same names will be overwritten.
|
| - """
|
| - gsutil = slave_utils.GSUtilSetup()
|
| - command = [gsutil]
|
| - if multi:
|
| - command.append('-m')
|
| - command.extend(['cp', '-R', remote_src_dir, local_dest_dir])
|
| - print 'Running command: %s' % command
|
| - shell_utils.run(command)
|
| -
|
| -
|
| -def copy_dir_contents(remote_src_dir, remote_dest_dir, gs_acl='private',
|
| - http_header_lines=None):
|
| - """Copy contents of one Google Storage directory to another.
|
| -
|
| - params:
|
| - remote_src_dir: source GS URL (gs://BUCKETNAME/PATH)
|
| - remote_dest_dir: dest GS URL (gs://BUCKETNAME/PATH)
|
| - gs_acl: which predefined ACL to apply to the new files; see
|
| - https://developers.google.com/storage/docs/accesscontrol#extension
|
| - http_header_lines: a list of HTTP header strings to add, if any
|
| -
|
| - The copy operates as a "merge with overwrite": any files in src_dir will be
|
| - "overlaid" on top of the existing content in dest_dir. Existing files with
|
| - the same names will be overwritten.
|
| -
|
| - Performs the copy in multithreaded mode, in case there are a large number of
|
| - files.
|
| - """
|
| - gsutil = slave_utils.GSUtilSetup()
|
| - command = [gsutil, '-m']
|
| - if http_header_lines:
|
| - for http_header_line in http_header_lines:
|
| - command.extend(['-h', http_header_line])
|
| - command.extend(['cp', '-a', gs_acl, '-R', remote_src_dir, remote_dest_dir])
|
| - print 'Running command: %s' % command
|
| - shell_utils.run(command)
|
| -
|
| -
|
| -def move_storage_directory(src_dir, dest_dir):
|
| - """Move a directory on Google Storage."""
|
| - gsutil = slave_utils.GSUtilSetup()
|
| - command = [gsutil]
|
| - command.extend(['mv', '-p', src_dir, dest_dir])
|
| - print 'Running command: %s' % command
|
| - shell_utils.run(command)
|
| -
|
| -
|
| -def list_storage_directory(dest_gsbase, subdir):
|
| - """List the contents of the specified Storage directory."""
|
| - gsbase_subdir = posixpath.join(dest_gsbase, subdir)
|
| - status, output_gsutil_ls = slave_utils.GSUtilListBucket(gsbase_subdir, [])
|
| - if status != 0:
|
| - raise Exception(
|
| - 'Could not list contents of %s in Google Storage!' % gsbase_subdir)
|
| -
|
| - gs_files = []
|
| - for line in set(output_gsutil_ls.splitlines()):
|
| - # Ignore lines with warnings and status messages.
|
| - if line and line.startswith(gsbase_subdir) and line != gsbase_subdir:
|
| - gs_files.append(line)
|
| - return gs_files
|
| -
|
| -
|
| -def does_storage_object_exist(object_name):
|
| - """Checks if an object exists on Google Storage.
|
| -
|
| - Returns True if it exists else returns False.
|
| - """
|
| - gsutil = slave_utils.GSUtilSetup()
|
| - command = [gsutil]
|
| - command.extend(['ls', object_name])
|
| - print 'Running command: %s' % command
|
| - try:
|
| - shell_utils.run(command)
|
| - return True
|
| - except shell_utils.CommandFailedException:
|
| - return False
|
| -
|
| -
|
| -def download_directory_contents_if_changed(gs_base, gs_relative_dir, local_dir):
|
| - """Compares the TIMESTAMP_LAST_UPLOAD_COMPLETED and downloads if different.
|
| -
|
| - The goal of download_directory_contents_if_changed and
|
| - upload_directory_contents_if_changed is to attempt to replicate directory
|
| - level rsync functionality to the Google Storage directories we care about.
|
| - """
|
| - if _are_timestamps_equal(gs_base, gs_relative_dir, local_dir):
|
| - print '\n\n=======Local directory is current=======\n\n'
|
| - else:
|
| - file_utils.create_clean_local_dir(local_dir)
|
| - gs_source = posixpath.join(gs_base, gs_relative_dir, '*')
|
| - slave_utils.GSUtilDownloadFile(src=gs_source, dst=local_dir)
|
| - if not _are_timestamps_equal(gs_base, gs_relative_dir, local_dir):
|
| - raise Exception('Failed to download from GS: %s' % gs_source)
|
| -
|
| -
|
| -def _get_chunks(seq, n):
|
| - """Yield successive n-sized chunks from the specified sequence."""
|
| - for i in xrange(0, len(seq), n):
|
| - yield seq[i:i+n]
|
| -
|
| -
|
| -def delete_directory_contents(gs_base, gs_relative_dir, files_to_delete):
|
| - """Deletes the specified files from the Google Storage Directory.
|
| -
|
| - Args:
|
| - gs_base: str - The Google Storage base. Eg: gs://rmistry.
|
| - gs_relative_dir: str - Relative directory to the Google Storage base.
|
| - files_to_delete: Files that should be deleted from the Google Storage
|
| - directory. The files are deleted one at a time. If files_to_delete is
|
| - None or empty then all directory contents are deleted.
|
| - """
|
| - gs_dest = posixpath.join(gs_base, gs_relative_dir)
|
| - if files_to_delete:
|
| - for file_to_delete in files_to_delete:
|
| - delete_storage_object(object_name=posixpath.join(gs_dest, file_to_delete))
|
| - else:
|
| - delete_storage_object(gs_dest)
|
| -
|
| -
|
| -def upload_directory_contents_if_changed(gs_base, gs_relative_dir, gs_acl,
|
| - local_dir, force_upload=False,
|
| - upload_chunks=False,
|
| - files_to_upload=None):
|
| - """Compares the TIMESTAMP_LAST_UPLOAD_COMPLETED and uploads if different.
|
| -
|
| - Args:
|
| - gs_base: str - The Google Storage base. Eg: gs://rmistry.
|
| - gs_relative_dir: str - Relative directory to the Google Storage base.
|
| - gs_acl: str - ACL to use when uploading to Google Storage.
|
| - local_dir: str - The local directory to upload.
|
| - force_upload: bool - Whether upload should be done regardless of timestamps
|
| - matching or not.
|
| - upload_chunks: bool - Whether upload should be done in chunks or in a single
|
| - command.
|
| - files_to_upload: str seq - Specific files that should be uploaded, if not
|
| - specified then all files in local_dir are uploaded. If upload_chunks is
|
| - True then files will be uploaded in chunks else they will be uploaded
|
| - one at a time. The Google Storage directory is not cleaned before upload
|
| - if files_to_upload is specified.
|
| -
|
| - The goal of download_directory_contents_if_changed and
|
| - upload_directory_contents_if_changed is to attempt to replicate directory
|
| - level rsync functionality to the Google Storage directories we care about.
|
| -
|
| - Returns True if contents were uploaded, else returns False.
|
| - """
|
| - if not force_upload and _are_timestamps_equal(gs_base, gs_relative_dir,
|
| - local_dir):
|
| - print '\n\n=======Local directory is current=======\n\n'
|
| - return False
|
| - else:
|
| - local_src = os.path.join(local_dir, '*')
|
| - gs_dest = posixpath.join(gs_base, gs_relative_dir)
|
| - timestamp_value = time.time()
|
| -
|
| - if not files_to_upload:
|
| - print '\n\n=======Delete Storage directory before uploading=======\n\n'
|
| - delete_storage_object(gs_dest)
|
| -
|
| - print '\n\n=======Writing new TIMESTAMP_LAST_UPLOAD_STARTED=======\n\n'
|
| - write_timestamp_file(
|
| - timestamp_file_name=TIMESTAMP_STARTED_FILENAME,
|
| - timestamp_value=timestamp_value, gs_base=gs_base,
|
| - gs_relative_dir=gs_relative_dir, local_dir=local_dir, gs_acl=gs_acl)
|
| -
|
| - if upload_chunks:
|
| - if files_to_upload:
|
| - local_files = [
|
| - os.path.join(local_dir, local_file)
|
| - for local_file in files_to_upload]
|
| - else:
|
| - local_files = [
|
| - os.path.join(local_dir, local_file)
|
| - for local_file in os.listdir(local_dir)]
|
| - for files_chunk in _get_chunks(local_files, FILES_CHUNK):
|
| - gsutil = slave_utils.GSUtilSetup()
|
| - command = [gsutil, 'cp'] + files_chunk + [gs_dest]
|
| - try:
|
| - shell_utils.run(command)
|
| - except shell_utils.CommandFailedException:
|
| - raise Exception(
|
| - 'Could not upload the chunk to Google Storage! The chunk: %s'
|
| - % files_chunk)
|
| - else:
|
| - if files_to_upload:
|
| - for file_to_upload in files_to_upload:
|
| - if slave_utils.GSUtilDownloadFile(
|
| - src=os.path.join(local_dir, file_to_upload), dst=gs_dest) != 0:
|
| - raise Exception(
|
| - 'Could not upload %s to Google Storage!' % file_to_upload)
|
| - else:
|
| - if slave_utils.GSUtilDownloadFile(src=local_src, dst=gs_dest) != 0:
|
| - raise Exception('Could not upload %s to Google Storage!' % local_src)
|
| -
|
| - print '\n\n=======Writing new TIMESTAMP_LAST_UPLOAD_COMPLETED=======\n\n'
|
| - write_timestamp_file(
|
| - timestamp_file_name=TIMESTAMP_COMPLETED_FILENAME,
|
| - timestamp_value=timestamp_value, gs_base=gs_base,
|
| - gs_relative_dir=gs_relative_dir, local_dir=local_dir, gs_acl=gs_acl)
|
| - return True
|
| -
|
| -
|
| -def _are_timestamps_equal(gs_base, gs_relative_dir, local_dir):
|
| - """Compares the local TIMESTAMP with the TIMESTAMP from Google Storage."""
|
| -
|
| - local_timestamp_file = os.path.join(local_dir, TIMESTAMP_COMPLETED_FILENAME)
|
| - # Make sure that the local TIMESTAMP file exists.
|
| - if not os.path.exists(local_timestamp_file):
|
| - return False
|
| -
|
| - # Get the timestamp file from Google Storage.
|
| - src = posixpath.join(gs_base, gs_relative_dir, TIMESTAMP_COMPLETED_FILENAME)
|
| - temp_file = tempfile.mkstemp()[1]
|
| - slave_utils.GSUtilDownloadFile(src=src, dst=temp_file)
|
| -
|
| - local_file_obj = open(local_timestamp_file, 'r')
|
| - storage_file_obj = open(temp_file, 'r')
|
| - try:
|
| - local_timestamp = local_file_obj.read().strip()
|
| - storage_timestamp = storage_file_obj.read().strip()
|
| - return local_timestamp == storage_timestamp
|
| - finally:
|
| - local_file_obj.close()
|
| - storage_file_obj.close()
|
| -
|
| -
|
| -def read_timestamp_file(timestamp_file_name, gs_base, gs_relative_dir):
|
| - """Reads the specified TIMESTAMP file from the specified GS dir.
|
| -
|
| - Returns 0 if the file is empty or does not exist.
|
| - """
|
| - src = posixpath.join(gs_base, gs_relative_dir, timestamp_file_name)
|
| - temp_file = tempfile.mkstemp()[1]
|
| - slave_utils.GSUtilDownloadFile(src=src, dst=temp_file)
|
| -
|
| - storage_file_obj = open(temp_file, 'r')
|
| - try:
|
| - timestamp_value = storage_file_obj.read().strip()
|
| - return timestamp_value if timestamp_value else "0"
|
| - finally:
|
| - storage_file_obj.close()
|
| -
|
| -
|
| -def write_timestamp_file(timestamp_file_name, timestamp_value, gs_base=None,
|
| - gs_relative_dir=None, gs_acl=None, local_dir=None):
|
| - """Adds a timestamp file to a Google Storage and/or a Local Directory.
|
| -
|
| - If gs_base, gs_relative_dir and gs_acl are provided then the timestamp is
|
| - written to Google Storage. If local_dir is provided then the timestamp is
|
| - written to a local directory.
|
| - """
|
| - timestamp_file = os.path.join(tempfile.gettempdir(), timestamp_file_name)
|
| - f = open(timestamp_file, 'w')
|
| - try:
|
| - f.write(str(timestamp_value))
|
| - finally:
|
| - f.close()
|
| - if local_dir:
|
| - shutil.copyfile(timestamp_file,
|
| - os.path.join(local_dir, timestamp_file_name))
|
| - if gs_base and gs_relative_dir and gs_acl:
|
| - slave_utils.GSUtilCopyFile(filename=timestamp_file, gs_base=gs_base,
|
| - subdir=gs_relative_dir, gs_acl=gs_acl)
|
| -
|
| -
|
| -def _convert_to_posixpath(localpath):
|
| - """Convert localpath to posix format."""
|
| - if os.sep == '/':
|
| - return localpath
|
| - else:
|
| - return '/'.join(localpath.split(os.sep))
|
|
|