tools/bisect_repackage/bisect_repackage_linux.py - Issue 2236703003: tool for repackaging chrome.perf builds for manual bisect

Unified Diff: tools/bisect_repackage/bisect_repackage_linux.py

Issue 2236703003: tool for repackaging chrome.perf builds for manual bisect (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: tool for repackaging chrome.perf builds for manual bisect Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/bisect_repackage/bisect_repackage_linux.py

diff --git a/tools/bisect_repackage/bisect_repackage_linux.py b/tools/bisect_repackage/bisect_repackage_linux.py

new file mode 100644

index 0000000000000000000000000000000000000000..4ec67e91f1bfc3cccae07b91fb8a5bf87ee9b749

--- /dev/null

+++ b/tools/bisect_repackage/bisect_repackage_linux.py

@@ -0,0 +1,393 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+""" Bisect repackage tool for Linux

+This script repacakges chrome builds for manual bisect script.

+"""

+#Declares required files to run manual bisect script on chrome Linux

+#builds in perf. Binary files that should be stripped to reduce zip file

+#size are declared. The file list was gotten from the local chrome

+#executable path in Linux. (This can be retrieved by typing 'chrome://version'

+#in chrome and following the executable path. The list needs to be updated if

+#future chrome versions require additional files.

+FILES_TO_ARCHIVE = [

+ 'chrome',

+ 'chrome_100_percent.pak',

+ 'chrome_200_percent.pak',

+ 'default_apps',

+ 'icudtl.dat',

+ 'libwidevinecdm.so',

+ 'locales',

+ 'nacl_helper',

+ 'nacl_helper_bootstrap',

+ 'nacl_irt_x86_64.nexe',

+ 'natives_blob.bin',

+ 'PepperFlash',

+ 'product_logo_48.png',

+ 'resources.pak',

+ 'snapshot_blob.bin',

+ 'xdg-mime',

+ 'xdg-settings',

+# Declares what files should be stripped of symbols to reduce the archive size.

+STRIP_LIST = [

+ 'chrome',

+ 'nacl_helper'

+# API to convert Githash to Commit position number.

+CHROMIUM_GITHASH_TO_SVN_URL = (

+ 'https://cr-rev.appspot.com/_ah/api/crrev/v1/commit/%s')

+BUILDER_NAME = 'Linux Builder'

+ARCHIVE_PREFIX = 'full-build-linux'

+REVISION_MAP_FILE = 'revision_map.json'

+################################################################################

+import os

+import bisect_repackage_utils

+import zipfile

+import zlib

+import json

+import sys

+import optparse

+import re

+import urllib

+import threading

+from multiprocessing import Pool

+from functools import partial

+# Declares where files should be stored temporarily for staging.

+# TODO (miimnk): use tempfile.mkdtemp to make temporary folders

+STAGING_DIR= os.path.join('.', 'a', 'tmp_forward')

dimu 2016/08/19 04:14:05 is there any cleanup for the staging dir?

miimnk 2016/08/19 20:29:43 Erased the two global variables and used 'tempfile

+STAGING_DIR_BACKWARD = os.path.join('.', 'a', 'tmp_backward')

+class PathContext(object):

+ """A PathContext is used to carry the information used to construct URLs and

+ paths when dealing with the storage server and archives."""

+ def __init__(self, original_gs_url, repackage_gs_url,

+ builder_name=BUILDER_NAME, file_prefix=ARCHIVE_PREFIX,

+ revision_map=REVISION_MAP_FILE):

+ super(PathContext, self).__init__()

+ self.original_gs_url = original_gs_url

+ self.repackage_gs_url = repackage_gs_url

+ self.builder_name = builder_name

+ self.file_prefix = file_prefix

+ self.revision_map = revision_map

+def get_cp_from_hash(git_hash):

+ """Converts a GitHash to Commit position number"""

+ json_url = CHROMIUM_GITHASH_TO_SVN_URL % git_hash

+ response = urllib.urlopen(json_url)

+ if response.getcode() == 200:

+ try:

+ data = json.loads(response.read())

+ except ValueError:

+ print 'ValueError for JSON URL: %s' % json_url

dimu 2016/08/19 04:14:05 It would be better to use logging instead of print

miimnk 2016/08/19 20:29:43 Done.

+ return None

dimu 2016/08/19 04:14:05 throw exception for failure, and use try/catch in

miimnk 2016/08/19 20:29:43 Done.

+ else:

+ print 'ValueError for JSON URL: %s' % json_url

+ return None

+ if 'number' in data:

+ return data['number']

+ print 'Failed to get svn revision number for %s' % git_hash

+ return None

+def create_cp_from_hash_map(hash_list):

+ """Creates a dictionary that maps from Commit positio number

+ to corresponding GitHash"""

+ hash_map = {}

+ count = 0

+ for git_hash in hash_list:

+ cp_num = get_cp_from_hash(git_hash)

+ hash_map[cp_num] = git_hash

+ print "Downloaded %s / %s git hash" %(count, len(hash_list))

+ count += 1

+ return hash_map

+def isProperHash(regex_match):

dimu 2016/08/19 04:14:05 move it to utils

dimu 2016/08/19 04:14:05 isGitCommitHash

miimnk 2016/08/19 20:29:42 Done.

miimnk 2016/08/19 20:29:43 Done.

+ """Checks if match is correct SHA1 hash"""

+ if len(regex_match) == 40: return True

dimu 2016/08/19 04:14:05 use re.match(r"^[0-9,A-F]{40}$", regex_match.upper

miimnk 2016/08/19 20:29:43 Done.

+ return False

+def isProperRevision(regex_match):

dimu 2016/08/19 04:14:05 move this to utils

miimnk 2016/08/19 20:29:43 Done.

+ """Checks if match is correct revision(Cp number) format"""

+ if len(regex_match) == 6: return True

dimu 2016/08/19 04:14:05 also use regex

miimnk 2016/08/19 20:29:43 Done.

+ return False

+def get_list_of_suffix(bucket_address, prefix, filter_function):

+ """Gets the list of suffixes in files in a google storage bucket.

+ For example, a google storage bucket containing one file

+ 'full-build-linux_20983' will return ['20983'] if prefix is

+ provided as 'full-build-linux'. Google Storage bucket

+ containing multiple files will return multiple suffixes. """

+ file_list = bisect_repackage_utils.GSutilList(bucket_address)

+ suffix_list = []

+ extract_suffix = '.*?%s_(.*?)\.zip' %(prefix)

+ for file in file_list:

+ match = re.match(extract_suffix, file)

+ if match and filter_function(match.groups()[0]):

+ suffix_list.append(match.groups()[0])

+ return suffix_list

+def download_build(cp_num, revision_map, zip_file_name, context):

+ """ Download a single build corresponding to the cp_num and context."""

+ file_url = '%s/%s/%s_%s.zip' %(context.original_gs_url, context.builder_name,

+ context.file_prefix, revision_map[cp_num])

+ bisect_repackage_utils.GSUtilDownloadFile(file_url, zip_file_name)

+def upload_build(zip_file, context):

+ """Uploads a single build in zip_file to the repackage_gs_url in context."""

+ gs_base_url = '%s/%s' %(context.repackage_gs_url, context.builder_name)

+ upload_url = gs_base_url + '/'

+ bisect_repackage_utils.GSUtilCopy(zip_file, upload_url)

+def download_revision_map(context):

+ """Downloads the revision map in original_gs_url in context."""

+ gs_base_url = '%s/%s' %(context.repackage_gs_url, context.builder_name)

+ download_url = gs_base_url + '/' + context.revision_map

+ try:

+ bisect_repackage_utils.GSUtilDownloadFile(download_url,

+ context.revision_map)

+ except Exception, e:

+ raise ValueError('Revision map does not exist. Re run the program with'

+ '-c option to upload a revision map to google storage')

+def get_revision_map(context):

+ """Downloads and returns the revision map in repackage_gs_url in context."""

+ bisect_repackage_utils.RemoveFile(context.revision_map)

+ download_revision_map(context)

+ with open(context.revision_map, 'r') as f:

dimu 2016/08/19 04:14:05 nit: better naming: revision_file

miimnk 2016/08/19 20:29:43 Done.

+ revision_map = json.load(f)

+ bisect_repackage_utils.RemoveFile(context.revision_map)

+ return revision_map

+def upload_revision_map(revision_map, context):

+ """Upload the given revision_map to the repackage_gs_url in context."""

+ with open(context.revision_map, 'w') as fp:

+ json.dump(revision_map, fp)

+ gs_base_url = '%s/%s' %(context.repackage_gs_url, context.builder_name)

+ upload_url = gs_base_url + '/'

+ bisect_repackage_utils.GSUtilCopy(context.revision_map, upload_url)

+ bisect_repackage_utils.RemoveFile(context.revision_map)

+def create_upload_revision_map(context):

+ """ Creates and uploads a dictionary that maps from GitHash to CP number."""

+ gs_base_url = '%s/%s' %(context.original_gs_url, context.builder_name)

+ hash_list = get_list_of_suffix(gs_base_url, context.file_prefix, isProperHash)

+ cp_num_to_hash_map = create_cp_from_hash_map(hash_list)

+ upload_revision_map(cp_num_to_hash_map, context)

+def update_upload_revision_map(context):

+ """ Updates and uploads a dictionary that maps from GitHash to CP number."""

+ gs_base_url = '%s/%s' %(context.original_gs_url, context.builder_name)

+ revision_map = get_revision_map(context)

+ hash_list = get_list_of_suffix(gs_base_url, context.file_prefix, isProperHash)

+ hash_list = list(set(hash_list)-set(revision_map.values()))

+ cp_num_to_hash_map = create_cp_from_hash_map(hash_list)

+ upload_revision_map(cp_num_to_hash_map, context)

+def make_filtered_archive(file_archive, archive_name):

dimu 2016/08/19 04:14:05 better naming

miimnk 2016/08/19 20:29:43 Done. Changed to 'make_lightweight_archive'.

+ """Repackages and strips the archive according to FILES_TO_ARCHIVE and

+ STRIP_LIST defined on the top."""

+ return bisect_repackage_utils.MakeZip('.',

+ archive_name,

+ FILES_TO_ARCHIVE,

+ file_archive,

+ raise_error=False,

+ strip_files=STRIP_LIST)

+def remove_created_files_and_path(files, paths):

+ """ Removes all the files and paths passed in."""

+ for file in files:

+ bisect_repackage_utils.RemoveFile(file)

+ for path in paths:

+ bisect_repackage_utils.RemovePath(path)

+def verify_chrome_run(zip_dir):

+ """This function executes chrome executable in zip_dir. Raises error if the

+ execution fails for any reason."""

+ try:

+ command = [os.path.join(zip_dir, 'chrome')]

+ code = bisect_repackage_utils.RunCommand(command)

+ if code != 0:

+ raise ValueError("Executing Chrome Failed: Need to check ")

+ except Exception, e:

+ raise ValueError("Executing Chrome Failed: Need to check ")

+def repackage_single_revision(revision_map, isForwardArchive, verify_run,

+ staging_dir, context, cp_num):

+ """Repackages a single Chrome build for manual bisect."""

+ archive_name = '%s_%s' %(context.file_prefix, cp_num)

+ file_archive = os.path.join(staging_dir, archive_name)

+ zip_file_name = '%s.zip' %file_archive

+ bisect_repackage_utils.RemoveFile(zip_file_name)

+ download_build(cp_num, revision_map, zip_file_name, context)

+ extract_dir = os.path.join(staging_dir, archive_name)

+ bisect_repackage_utils.ExtractZip(zip_file_name, extract_dir)

+ extracted_folder = os.path.join(extract_dir, context.file_prefix)

+ (zip_dir, zip_file) = make_filtered_archive(extracted_folder, archive_name)

+ if verify_run:

+ verify_chrome_run(zip_dir)

+ upload_build(zip_file, context)

+ # Removed temporary files created during repackaging process.

+ remove_created_files_and_path([zip_file, zip_file_name],

+ [zip_dir, extract_dir])

+def repackage_revisions(revisions, revision_map, isForwardArchive, verify_run,

+ staging_dir, context, quit_event=None,

+ progress_event=None):

+ """ Repackages all Chrome builds listed in revisions. This function calls

+ 'repackage_single_revision' with multithreading pool.'"""

+ p = Pool(3)

+ func = partial(repackage_single_revision, revision_map, isForwardArchive,

+ verify_run, staging_dir, context)

+ p.imap(func, revisions)

+ p.close()

+ p.join()

+def get_uploaded_builds(context):

+ """Returns already uploaded commit positions numbers in

+ context.repackage_gs_url"""

+ gs_base_url = '%s/%s' %(context.repackage_gs_url, context.builder_name)

+ return get_list_of_suffix(gs_base_url, context.file_prefix, isProperRevision)

+def get_revisions_to_package(revision_map, context):

+ """ Returns revisions that need to be repackaged. The first half of

+ the revisions will be sorted in ascending order and the second half of

+ the revisions will be sorted in desending order.

+ The first half will be used for repackaging revisions in forward direction,

+ and the second half will be used for repackaging revisions in backward

+ direction."""

+ already_packaged = get_uploaded_builds(context)

+ not_already_packaged = list(set(revision_map.keys())-set(already_packaged))

+ revisions_to_package = sorted(not_already_packaged)

+ revisions_to_package = filter(lambda a: a != 'null', revisions_to_package)

+ forward_rev = revisions_to_package[:len(revisions_to_package)/2]

+ backward_rev = sorted(revisions_to_package[len(revisions_to_package)/2:],

+ reverse=True)

+ return (forward_rev, backward_rev)

+class RepackageJob(object):

+ def __init__(self, name, revisions_to_package, revision_map, isForwardArchive,

+ verify_run, staging_dir, context):

+ super(RepackageJob, self).__init__()

+ self.name = name

+ self.revisions_to_package = revisions_to_package

+ self.revision_map = revision_map

+ self.isForwardArchive = isForwardArchive

+ self.verify_run = verify_run

+ self.staging_dir = staging_dir

+ self.context = context

+ self.quit_event = threading.Event()

+ self.progress_event = threading.Event()

+ self.thread = None

+ def Start(self):

+ """Starts the download."""

+ fetchargs = (self.revisions_to_package,

+ self.revision_map,

+ self.isForwardArchive,

+ self.verify_run,

+ self.staging_dir,

+ self.context,

+ self.quit_event,

+ self.progress_event)

+ self.thread = threading.Thread(target=repackage_revisions,

+ name=self.name,

+ args=fetchargs)

+ self.thread.start()

+ def Stop(self):

+ """Stops the download which must have been started previously."""

+ assert self.thread, 'DownloadJob must be started before Stop is called.'

+ self.quit_event.set()

+ self.thread.join()

+ def WaitFor(self):

+ """Prints a message and waits for the download to complete. The download

+ must have been started previously."""

+ assert self.thread, 'DownloadJob must be started before WaitFor is called.'

+ self.progress_event.set() # Display progress of download. def Stop(self):

+ assert self.thread, 'DownloadJob must be started before Stop is called.'

+ self.quit_event.set()

+ self.thread.join()

+def main(argv):

+ option_parser = optparse.OptionParser()

+ # Verifies that the chrome executable runs

+ option_parser.add_option('-v', '--verify',

+ action='store_true',

+ help='Verifies that the Chrome executes normally'

+ 'without errors')

+ # This option will update the revision map.

+ option_parser.add_option('-u', '--update',

+ action='store_true',

+ help='Updates the list of revisions to repackage')

+ # This option will creates the revision map.

+ option_parser.add_option('-c', '--create',

+ action='store_true',

+ help='Creates the list of revisions to repackage')

+ # Original bucket that contains perf builds

+ option_parser.add_option('-o', '--original',

+ type='str',

+ help='Google storage url containing original Chrome builds')

+ # Bucket that should archive lightweight perf builds

+ option_parser.add_option('-r', '--repackage',

+ type='str',

+ help='Google storage url to re-archive Chrome builds')

+ verify_run = False

+ (opts, args) = option_parser.parse_args()

+ if not opts.original or not opts.repackage:

+ raise ValueError('Need to specify original gs bucket url and'

+ 'repackage gs bucket url')

+ context = PathContext(opts.original, opts.repackage)

+ if opts.create:

+ create_upload_revision_map(context)

+ if opts.update:

+ update_upload_revision_map(context)

+ if opts.verify:

+ verify_run = True

+ revision_map = get_revision_map(context)

+ (forward_rev, backward_rev) = get_revisions_to_package(revision_map, context)

+ bisect_repackage_utils.MaybeMakeDirectory(STAGING_DIR)

+ bisect_repackage_utils.MaybeMakeDirectory(STAGING_DIR_BACKWARD)

+ # Spwan two threads: One repackaging from oldest builds. The other repackaging

+ # from the newest builds.

+ forward_fetch = RepackageJob('forward_fetch', forward_rev, revision_map, True,

+ verify_run, os.path.abspath(STAGING_DIR),

+ context)

+ backward_fetch = RepackageJob('backward_fetch', backward_rev, revision_map,

+ False, verify_run,

+ os.path.abspath(STAGING_DIR_BACKWARD),

+ context)

+ forward_fetch.Start()

+ backward_fetch.Start()

+ forward_fetch.WaitFor()

+ backward_fetch.WaitFor()

+if '__main__' == __name__:

+ sys.exit(main(sys.argv))

« no previous file with comments | « no previous file | tools/bisect_repackage/bisect_repackage_utils.py » ('j') | tools/bisect_repackage/bisect_repackage_utils.py » ('J')