tools/bisect-builds.py - Issue 7493016: Added pre-fetching of builds.

Unified Diff: tools/bisect-builds.py

Issue 7493016: Added pre-fetching of builds. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: nits Created 9 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/bisect-builds.py

diff --git a/tools/bisect-builds.py b/tools/bisect-builds.py

index 68ceb3c722fab14e4682943a82ba99e47b400c72..3ccedbb242a7c5757c1ec70f8b1fe72fc18be3d6 100644

--- a/tools/bisect-builds.py

+++ b/tools/bisect-builds.py

@@ -14,7 +14,6 @@ it will ask you whether it is good or bad before continuing the search.

# The root URL for storage.

BASE_URL = 'http://commondatastorage.googleapis.com/chromium-browser-snapshots'

-BASE_URL_RECENT = 'http://build.chromium.org/f/chromium/snapshots'

# URL to the ViewVC commit page.

BUILD_VIEWVC_URL = 'http://src.chromium.org/viewvc/chrome?view=rev&revision=%d'

@@ -31,8 +30,10 @@ import os

import pipes

import re

import shutil

+import subprocess

import sys

import tempfile

+import threading

import urllib

from xml.etree import ElementTree

import zipfile

@@ -40,13 +41,12 @@ import zipfile

class PathContext(object):

"""A PathContext is used to carry the information used to construct URLs and

paths when dealing with the storage server and archives."""

- def __init__(self, platform, good_revision, bad_revision, use_recent):

+ def __init__(self, platform, good_revision, bad_revision):

super(PathContext, self).__init__()

# Store off the input parameters.

self.platform = platform # What's passed in to the '-a/--archive' option.

self.good_revision = good_revision

self.bad_revision = bad_revision

- self.use_recent = use_recent

# The name of the ZIP file in a revision directory on the server.

self.archive_name = None

@@ -74,7 +74,7 @@ class PathContext(object):

self._archive_extract_dir = 'chrome-win32'

self._binary_name = 'chrome.exe'

else:

- raise Exception("Invalid platform")

+ raise Exception('Invalid platform: %s' % self.platform)

def GetListingURL(self, marker=None):

"""Returns the URL for a directory listing, with an optional marker."""

@@ -84,19 +84,10 @@ class PathContext(object):

return BASE_URL + '/?delimiter=/&prefix=' + self._listing_platform_dir + \

marker_param

- def GetListingURLRecent(self):

- """Returns the URL for a directory listing of recent builds."""

- return BASE_URL_RECENT + '/' + self._listing_platform_dir

def GetDownloadURL(self, revision):

"""Gets the download URL for a build archive of a specific revision."""

- if self.use_recent:

- return "%s/%s%d/%s" % (

- BASE_URL_RECENT, self._listing_platform_dir, revision,

- self.archive_name)

- else:

- return "%s/%s%d/%s" % (

- BASE_URL, self._listing_platform_dir, revision, self.archive_name)

+ return "%s/%s%d/%s" % (

+ BASE_URL, self._listing_platform_dir, revision, self.archive_name)

def GetLastChangeURL(self):

"""Returns a URL to the LAST_CHANGE file."""

@@ -107,12 +98,82 @@ class PathContext(object):

that is used to run the executable."""

return os.path.join(self._archive_extract_dir, self._binary_name)

+ def ParseDirectoryIndex(self):

+ """Parses the Google Storage directory listing into a list of revision

+ numbers. The range starts with self.good_revision and goes until

+ self.bad_revision."""

+ def _FetchAndParse(url):

+ """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If

+ next-marker is not None, then the listing is a partial listing and another

+ fetch should be performed with next-marker being the marker= GET

+ parameter."""

+ handle = urllib.urlopen(url)

+ document = ElementTree.parse(handle)

+ # All nodes in the tree are namespaced. Get the root's tag name to extract

+ # the namespace. Etree does namespaces as |{namespace}tag|.

+ root_tag = document.getroot().tag

+ end_ns_pos = root_tag.find('}')

+ if end_ns_pos == -1:

+ raise Exception("Could not locate end namespace for directory index")

+ namespace = root_tag[:end_ns_pos + 1]

+ # Find the prefix (_listing_platform_dir) and whether or not the list is

+ # truncated.

+ prefix_len = len(document.find(namespace + 'Prefix').text)

+ next_marker = None

+ is_truncated = document.find(namespace + 'IsTruncated')

+ if is_truncated is not None and is_truncated.text.lower() == 'true':

+ next_marker = document.find(namespace + 'NextMarker').text

+ # Get a list of all the revisions.

+ all_prefixes = document.findall(namespace + 'CommonPrefixes/' +

+ namespace + 'Prefix')

+ # The <Prefix> nodes have content of the form of

+ # |_listing_platform_dir/revision/|. Strip off the platform dir and the

+ # trailing slash to just have a number.

+ revisions = []

+ for prefix in all_prefixes:

+ revnum = prefix.text[prefix_len:-1]

+ try:

+ revnum = int(revnum)

+ revisions.append(revnum)

+ except ValueError:

+ pass

+ return (revisions, next_marker)

+ # Fetch the first list of revisions.

+ (revisions, next_marker) = _FetchAndParse(self.GetListingURL())

+ # If the result list was truncated, refetch with the next marker. Do this

+ # until an entire directory listing is done.

+ while next_marker:

+ next_url = self.GetListingURL(next_marker)

+ (new_revisions, next_marker) = _FetchAndParse(next_url)

+ revisions.extend(new_revisions)

+ return revisions

+ def GetRevList(self):

+ """Gets the list of revision numbers between self.good_revision and

+ self.bad_revision."""

+ # Download the revlist and filter for just the range between good and bad.

+ minrev = self.good_revision

+ maxrev = self.bad_revision

+ revlist = map(int, self.ParseDirectoryIndex())

+ revlist = [x for x in revlist if x >= minrev and x <= maxrev]

+ revlist.sort()

+ return revlist

def UnzipFilenameToDir(filename, dir):

"""Unzip |filename| to directory |dir|."""

+ cwd = os.getcwd()

+ if not os.path.isabs(filename):

+ filename = os.path.join(cwd, filename)

zf = zipfile.ZipFile(filename)

# Make base.

- pushd = os.getcwd()

try:

if not os.path.isdir(dir):

os.mkdir(dir)

@@ -132,191 +193,201 @@ def UnzipFilenameToDir(filename, dir):

out.close()

# Set permissions. Permission info in external_attr is shifted 16 bits.

os.chmod(name, info.external_attr >> 16L)

- os.chdir(pushd)

+ os.chdir(cwd)

except Exception, e:

print >>sys.stderr, e

sys.exit(1)

-def ParseDirectoryIndex(context):

- """Parses the Google Storage directory listing into a list of revision

- numbers. The range starts with context.good_revision and goes until the latest

- revision."""

- def _FetchAndParse(url):

- """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If

- next-marker is not None, then the listing is a partial listing and another

- fetch should be performed with next-marker being the marker= GET

- parameter."""

- handle = urllib.urlopen(url)

- document = ElementTree.parse(handle)

- # All nodes in the tree are namespaced. Get the root's tag name to extract

- # the namespace. Etree does namespaces as |{namespace}tag|.

- root_tag = document.getroot().tag

- end_ns_pos = root_tag.find('}')

- if end_ns_pos == -1:

- raise Exception("Could not locate end namespace for directory index")

- namespace = root_tag[:end_ns_pos + 1]

- # Find the prefix (_listing_platform_dir) and whether or not the list is

- # truncated.

- prefix_len = len(document.find(namespace + 'Prefix').text)

- next_marker = None

- is_truncated = document.find(namespace + 'IsTruncated')

- if is_truncated is not None and is_truncated.text.lower() == 'true':

- next_marker = document.find(namespace + 'NextMarker').text

- # Get a list of all the revisions.

- all_prefixes = document.findall(namespace + 'CommonPrefixes/' +

- namespace + 'Prefix')

- # The <Prefix> nodes have content of the form of

- # |_listing_platform_dir/revision/|. Strip off the platform dir and the

- # trailing slash to just have a number.

- revisions = []

- for prefix in all_prefixes:

- revnum = prefix.text[prefix_len:-1]

- try:

- revnum = int(revnum)

- revisions.append(revnum)

- except ValueError:

- pass

- return (revisions, next_marker)

- # Fetch the first list of revisions.

- (revisions, next_marker) = _FetchAndParse(context.GetListingURL())

- # If the result list was truncated, refetch with the next marker. Do this

- # until an entire directory listing is done.

- while next_marker:

- (new_revisions, next_marker) = _FetchAndParse(

- context.GetListingURL(next_marker))

- revisions.extend(new_revisions)

- return revisions

-def ParseDirectoryIndexRecent(context):

- """Parses the recent builds directory listing into a list of revision

- numbers."""

- handle = urllib.urlopen(context.GetListingURLRecent())

- document = handle.read()

- # Looking for: <a href="92976/">92976/</a>

- return re.findall(r"<a href=\"(\d+)/\">\1/</a>", document)

-def FilterRevList(context, revlist):

- """Filter revlist to the revisions between |good_revision| and

- |bad_revision| of the |context|."""

- # Download the revlist and filter for just the range between good and bad.

- rev_range = range(context.good_revision, context.bad_revision)

- revlist = filter(lambda r: r in rev_range, revlist)

- revlist.sort()

- return revlist

-def TryRevision(context, rev, profile, args):

- """Downloads revision |rev|, unzips it, and opens it for the user to test.

- |profile| is the profile to use."""

- # Do this in a temp dir so we don't collide with user files.

- cwd = os.getcwd()

- tempdir = tempfile.mkdtemp(prefix='bisect_tmp')

- os.chdir(tempdir)

+def FetchRevision(context, rev, filename, quit_event=None):

+ """Downloads and unzips revision |rev|.

+ @param context A PathContext instance.

+ @param rev The Chromium revision number/tag to download.

+ @param filename The destination for the downloaded file.

+ @param quit_event A threading.Event which will be set by the master thread to

+ indicate that the download should be aborted.

+ """

+ def ReportHook(blocknum, blocksize, totalsize):

+ if quit_event and quit_event.is_set():

+ raise RuntimeError("Aborting download of revision %d" % rev)

- # Download the file.

download_url = context.GetDownloadURL(rev)

- def _ReportHook(blocknum, blocksize, totalsize):

- size = blocknum * blocksize

- if totalsize == -1: # Total size not known.

- progress = "Received %d bytes" % size

- else:

- size = min(totalsize, size)

- progress = "Received %d of %d bytes, %.2f%%" % (

- size, totalsize, 100.0 * size / totalsize)

- # Send a \r to let all progress messages use just one line of output.

- sys.stdout.write("\r" + progress)

- sys.stdout.flush()

try:

- print 'Fetching ' + download_url

- urllib.urlretrieve(download_url, context.archive_name, _ReportHook)

- print

- # Throw an exception if the download was less than 1000 bytes.

- if os.path.getsize(context.archive_name) < 1000: raise Exception()

- except Exception, e:

- print('Could not retrieve the download. Sorry.')

- sys.exit(-1)

+ urllib.urlretrieve(download_url, filename, ReportHook)

+ except RuntimeError, e:

+ pass

+def RunRevision(context, revision, zipfile, profile, args):

+ """Given a zipped revision, unzip it and run the test."""

+ print "Trying revision %d..." % revision

- # Unzip the file.

- print 'Unzipping ...'

- UnzipFilenameToDir(context.archive_name, os.curdir)

+ # Create a temp directory and unzip the revision into it

Robert Sesek 2011/07/29 22:36:11 .

+ cwd = os.getcwd()

+ tempdir = tempfile.mkdtemp(prefix='bisect_tmp')

+ UnzipFilenameToDir(zipfile, tempdir)

+ os.chdir(tempdir)

- # Tell the system to open the app.

- args = ['--user-data-dir=%s' % profile] + args

- flags = ' '.join(map(pipes.quote, args))

- cmd = '%s %s' % (context.GetLaunchPath(), flags)

- print 'Running %s' % cmd

- os.system(cmd)

+ # Run the build.

+ testargs = [context.GetLaunchPath(), '--user-data-dir=%s' % profile] + args

+ subproc = subprocess.Popen(testargs,

+ bufsize=-1,

+ stdout=subprocess.PIPE,

+ stderr=subprocess.PIPE)

+ (stdout, stderr) = subproc.communicate()

os.chdir(cwd)

- print 'Cleaning temp dir ...'

try:

shutil.rmtree(tempdir, True)

except Exception, e:

pass

+ return (subproc.returncode, stdout, stderr)

-def AskIsGoodBuild(rev):

+def AskIsGoodBuild(rev, status, stdout, stderr):

"""Ask the user whether build |rev| is good or bad."""

# Loop until we get a response that we can parse.

while True:

- response = raw_input('\nBuild %d is [(g)ood/(b)ad]: ' % int(rev))

+ response = raw_input('\nRevision %d is [(g)ood/(b)ad/(q)uit]: ' % int(rev))

if response and response in ('g', 'b'):

return response == 'g'

+ if response and response == 'q':

+ raise SystemExit()

-def Bisect(revlist,

- context,

+def Bisect(platform,

+ good_rev=0,

+ bad_rev=0,

try_args=(),

- profile='profile',

+ profile=None,

predicate=AskIsGoodBuild):

- """Tries to find the exact commit where a regression was introduced by

- running a binary search on all archived builds in a given revision range.

- @param revlist A list of chromium revision numbers to check.

- @param context A PathContext object.

- @param try_args A tuple of arguments to pass to the predicate function.

- @param profile The user profile with which to run chromium.

+ """Given known good and known bad revisions, run a binary search on all

+ archived revisions to determine the last known good revision.

+ @param platform Which build to download/run ('mac', 'win', 'linux64', etc.).

+ @param good_rev Number/tag of the last known good revision.

+ @param bad_rev Number/tag of the first known bad revision.

+ @param try_args A tuple of arguments to pass to the test application.

+ @param profile The name of the user profile to run with.

@param predicate A predicate function which returns True iff the argument

chromium revision is good.

+ Threading is used to fetch Chromium revisions in the background, speeding up

+ the user's experience. For example, suppose the bounds of the search are

+ good_rev=0, bad_rev=100. The first revision to be checked is 50. Depending on

+ whether revision 50 is good or bad, the next revision to check will be either

+ 25 or 75. So, while revision 50 is being checked, the script will download

+ revisions 25 and 75 in the background. Once the good/bad verdict on rev 50 is

+ known:

+ - If rev 50 is good, the download of rev 25 is cancelled, and the next test

+ is run on rev 75.

+ - If rev 50 is bad, the download of rev 75 is cancelled, and the next test

+ is run on rev 25.

"""

- good = 0

- bad = len(revlist) - 1

- last_known_good_rev = revlist[good]

- first_known_bad_rev = revlist[bad]

+ if not profile:

+ profile = 'profile'

- # Binary search time!

- while good < bad:

- candidates = revlist[good:bad]

- num_poss = len(candidates)

- if num_poss > 10:

- print('%d candidates. %d tries left.' %

- (num_poss, round(math.log(num_poss, 2))))

- else:

- print('Candidates: %s' % revlist[good:bad])

+ context = PathContext(platform, good_rev, bad_rev)

+ cwd = os.getcwd()

- # Cut the problem in half...

- test = int((bad - good) / 2) + good

- test_rev = revlist[test]

+ _GetDownloadPath = lambda rev: os.path.join(cwd,

+ '%d-%s' % (rev, context.archive_name))

- # Let the user give this rev a spin (in her own profile, if she wants).

- TryRevision(context, test_rev, profile, try_args)

- if predicate(test_rev):

- last_known_good_rev = test_rev

- good = test + 1

- else:

- bad = test

+ revlist = context.GetRevList()

- return (last_known_good_rev, first_known_bad_rev)

+ # Get a list of revisions to bisect across.

+ if len(revlist) < 2: # Don't have enough builds to bisect

Robert Sesek 2011/07/29 22:36:11 .

+ msg = 'We don\'t have enough builds to bisect. revlist: %s' % revlist

+ raise RuntimeError(msg)

+ # Figure out our bookends and first pivot point; fetch the pivot revision.

+ good = 0

+ bad = len(revlist) - 1

+ pivot = bad / 2

+ rev = revlist[pivot]

+ zipfile = _GetDownloadPath(rev)

+ print "Downloading revision %d..." % rev

+ FetchRevision(context, rev, zipfile)

+ # Binary search time!

+ while zipfile and bad - good > 1:

+ # Pre-fetch next two possible pivots

+ # - down_pivot is the next revision to check if the current revision turns

+ # out to be bad.

+ # - up_pivot is the next revision to check if the current revision turns

+ # out to be good.

+ down_pivot = int((pivot - good) / 2) + good

+ down_thread = None

+ if down_pivot != pivot and down_pivot != good:

+ down_rev = revlist[down_pivot]

+ down_zipfile = _GetDownloadPath(down_rev)

+ down_event = threading.Event()

+ fetchargs = (context, down_rev, down_zipfile, down_event)

+ down_thread = threading.Thread(target=FetchRevision,

+ name='down_fetch',

+ args=fetchargs)

+ down_thread.start()

+ up_pivot = int((bad - pivot) / 2) + pivot

+ up_thread = None

+ if up_pivot != pivot and up_pivot != bad:

+ up_rev = revlist[up_pivot]

+ up_zipfile = _GetDownloadPath(up_rev)

+ up_event = threading.Event()

+ fetchargs = (context, up_rev, up_zipfile, up_event)

+ up_thread = threading.Thread(target=FetchRevision,

+ name='up_fetch',

+ args=fetchargs)

+ up_thread.start()

+ # Run test on the pivot revision.

+ (status, stdout, stderr) = RunRevision(context,

+ rev,

+ zipfile,

+ profile,

+ try_args)

+ os.unlink(zipfile)

+ zipfile = None

+ # Call the predicate function to see if the current revision is good or bad.

+ # On that basis, kill one of the background downloads and complete the

+ # other, as described in the comments above.

+ try:

+ if predicate(rev, status, stdout, stderr):

+ good = pivot

+ if down_thread:

+ down_event.set() # Kill the download of older revision

Robert Sesek 2011/07/29 22:36:11 here

+ down_thread.join()

+ os.unlink(down_zipfile)

+ if up_thread:

+ print "Downloading revision %d..." % up_rev

+ up_thread.join() # Wait for newer revision to finish downloading

Robert Sesek 2011/07/29 22:36:11 here

+ pivot = up_pivot

+ zipfile = up_zipfile

+ else:

+ bad = pivot

+ if up_thread:

+ up_event.set() # Kill download of newer revision

Robert Sesek 2011/07/29 22:36:11 .

+ up_thread.join()

+ os.unlink(up_zipfile)

+ if down_thread:

+ print "Downloading revision %d..." % down_rev

+ down_thread.join() # Wait for older revision to finish downloading

Robert Sesek 2011/07/29 22:36:11 .

+ pivot = down_pivot

+ zipfile = down_zipfile

+ except SystemExit:

+ for f in [down_zipfile, up_zipfile]:

+ try:

+ os.unlink(f)

+ except OSError:

+ pass

+ sys.exit(0)

+ rev = revlist[pivot]

+ return (revlist[good], revlist[bad])

def main():

@@ -354,7 +425,7 @@ def main():

return 1

# Create the context. Initialize 0 for the revisions as they are set below.

- context = PathContext(opts.archive, 0, 0, use_recent=False)

+ context = PathContext(opts.archive, 0, 0)

# Pick a starting point, try to get HEAD for this.

if opts.bad:

@@ -384,45 +455,8 @@ def main():

except Exception, e:

pass

- # Set the input parameters now that they've been validated.

- context.good_revision = good_rev

- context.bad_revision = bad_rev

- # Get recent revision list and check whether it's sufficient.

- all_revs_recent = map(int, ParseDirectoryIndexRecent(context))

- all_revs_recent.sort()

- # Skipping 0 since it might be deleted off the server soon:

- all_revs_recent = all_revs_recent[1:]

- oldest_recent_rev = all_revs_recent[0]

- if good_rev >= oldest_recent_rev:

- # The range is within recent builds, so switch on use_recent.

- context.use_recent = True

- elif bad_rev >= oldest_recent_rev:

- # The range spans both old and recent builds.

- # If oldest_recent_rev is good, we bisect the recent builds.

- context.use_recent = True

- TryRevision(context, oldest_recent_rev, opts.profile, args)

- if AskIsGoodBuild(oldest_recent_rev):

- # context.use_recent is True

- context.good_revision = oldest_recent_rev

- else:

- context.use_recent = False

- context.bad_revision = oldest_recent_rev

- all_revs = []

- if context.use_recent:

- all_revs = all_revs_recent

- else:

- all_revs = map(int, ParseDirectoryIndex(context))

- # Filter list of revisions to bisect across.

- revlist = FilterRevList(context, all_revs)

- if len(revlist) < 2: # Don't have enough builds to bisect

- print 'We don\'t have enough builds to bisect. revlist: %s' % revlist

- sys.exit(1)

(last_known_good_rev, first_known_bad_rev) = Bisect(

- revlist, context, args, opts.profile)

+ opts.archive, good_rev, bad_rev, args, opts.profile)

# We're done. Let the user know the results in an official manner.

print('You are probably looking for build %d.' % first_known_bad_rev)

« no previous file with comments | « no previous file | no next file » | no next file with comments »