| Index: tools/compare_codereview.py
|
| diff --git a/tools/compare_codereview.py b/tools/compare_codereview.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..16f3a0198d9e12647072ab3b3e96c8e8d9e566a9
|
| --- /dev/null
|
| +++ b/tools/compare_codereview.py
|
| @@ -0,0 +1,387 @@
|
| +#!/usr/bin/python2
|
| +
|
| +# Copyright 2014 Google Inc.
|
| +#
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""Skia's Chromium Codereview Comparison Script.
|
| +
|
| +This script takes two Codereview URLs, looks at the trybot results for
|
| +the two codereviews and compares the results.
|
| +
|
| +Usage:
|
| + compare_codereview.py CONTROL_URL ROLL_URL
|
| +"""
|
| +
|
| +import collections
|
| +import os
|
| +import re
|
| +import sys
|
| +import urllib2
|
| +import HTMLParser
|
| +
|
| +
|
| +class CodeReviewHTMLParser(HTMLParser.HTMLParser):
|
| + """Parses CodeReview web page.
|
| +
|
| + Use the CodeReviewHTMLParser.parse static function to make use of
|
| + this class.
|
| +
|
| + This uses the HTMLParser class because it's the best thing in
|
| + Python's standard library. We need a little more power than a
|
| + regex. [Search for "You can't parse [X]HTML with regex." for more
|
| + information.
|
| + """
|
| + # pylint: disable=I0011,R0904
|
| + @staticmethod
|
| + def parse(url):
|
| + """Parses a CodeReview web pages.
|
| +
|
| + Args:
|
| + url (string), a codereview URL like this:
|
| + 'https://codereview.chromium.org/?????????'.
|
| +
|
| + Returns:
|
| + A dictionary; the keys are bot_name strings, the values
|
| + are CodeReviewHTMLParser.Status objects
|
| + """
|
| + parser = CodeReviewHTMLParser()
|
| + try:
|
| + parser.feed(urllib2.urlopen(url).read())
|
| + except (urllib2.URLError,):
|
| + print >> sys.stderr, 'Error getting', url
|
| + return None
|
| + parser.close()
|
| + return parser.statuses
|
| +
|
| + # namedtuples are like lightweight structs in Python. The low
|
| + # overhead of a tuple, but the ease of use of an object.
|
| + Status = collections.namedtuple('Status', ['status', 'url'])
|
| +
|
| + def __init__(self):
|
| + HTMLParser.HTMLParser.__init__(self)
|
| + self._id = None
|
| + self._status = None
|
| + self._href = None
|
| + self._anchor_data = ''
|
| + self._currently_parsing_trybotdiv = False
|
| + # statuses is a dictionary of CodeReviewHTMLParser.Status
|
| + self.statuses = {}
|
| +
|
| + def handle_starttag(self, tag, attrs):
|
| + """Overrides the HTMLParser method to implement functionality.
|
| +
|
| + [[begin standard library documentation]]
|
| + This method is called to handle the start of a tag
|
| + (e.g. <div id="main">).
|
| +
|
| + The tag argument is the name of the tag converted to lower
|
| + case. The attrs argument is a list of (name, value) pairs
|
| + containing the attributes found inside the tag's <>
|
| + brackets. The name will be translated to lower case, and
|
| + quotes in the value have been removed, and character and
|
| + entity references have been replaced.
|
| +
|
| + For instance, for the tag <A HREF="http://www.cwi.nl/">, this
|
| + method would be called as handle_starttag('a', [('href',
|
| + 'http://www.cwi.nl/')]).
|
| + [[end standard library documentation]]
|
| + """
|
| + attrs = dict(attrs)
|
| + if tag == 'div':
|
| + # We are looking for <div id="tryjobdiv*">.
|
| + id_attr = attrs.get('id','')
|
| + if id_attr.startswith('tryjobdiv'):
|
| + self._id = id_attr
|
| + if (self._id and tag == 'a'
|
| + and 'build-result' in attrs.get('class', '').split()):
|
| + # If we are already inside a <div id="tryjobdiv*">, we
|
| + # look for a link if the form
|
| + # <a class="build-result" href="*">. Then we save the
|
| + # (non-standard) status attribute and the URL.
|
| + self._status = attrs.get('status')
|
| + self._href = attrs.get('href')
|
| + self._currently_parsing_trybotdiv = True
|
| + # Start saving anchor data.
|
| +
|
| + def handle_data(self, data):
|
| + """Overrides the HTMLParser method to implement functionality.
|
| +
|
| + [[begin standard library documentation]]
|
| + This method is called to process arbitrary data (e.g. text
|
| + nodes and the content of <script>...</script> and
|
| + <style>...</style>).
|
| + [[end standard library documentation]]
|
| + """
|
| + # Save the text inside the <a></a> tags. Assume <a> tags
|
| + # aren't nested.
|
| + if self._currently_parsing_trybotdiv:
|
| + self._anchor_data += data
|
| +
|
| + def handle_endtag(self, tag):
|
| + """Overrides the HTMLParser method to implement functionality.
|
| +
|
| + [[begin standard library documentation]]
|
| + This method is called to handle the end tag of an element
|
| + (e.g. </div>). The tag argument is the name of the tag
|
| + converted to lower case.
|
| + [[end standard library documentation]]
|
| + """
|
| + if tag == 'a' and self._status:
|
| + # We take the accumulated self._anchor_data and save it as
|
| + # the bot name.
|
| + bot = self._anchor_data.strip()
|
| + stat = CodeReviewHTMLParser.Status(status=self._status,
|
| + url=self._href)
|
| + if bot:
|
| + # Add to accumulating dictionary.
|
| + self.statuses[bot] = stat
|
| + # Reset state to search for the next bot.
|
| + self._currently_parsing_trybotdiv = False
|
| + self._anchor_data = ''
|
| + self._status = None
|
| + self._href = None
|
| +
|
| +
|
| +class BuilderHTMLParser(HTMLParser.HTMLParser):
|
| + """parses Trybot web pages.
|
| +
|
| + Use the BuilderHTMLParser.parse static function to make use of
|
| + this class.
|
| +
|
| + This uses the HTMLParser class because it's the best thing in
|
| + Python's standard library. We need a little more power than a
|
| + regex. [Search for "You can't parse [X]HTML with regex." for more
|
| + information.
|
| + """
|
| + # pylint: disable=I0011,R0904
|
| + @staticmethod
|
| + def parse(url):
|
| + """Parses a Trybot web page.
|
| +
|
| + Args:
|
| + url (string), a trybot result URL.
|
| +
|
| + Returns:
|
| + An array of BuilderHTMLParser.Results, each a description
|
| + of failure results, along with an optional url
|
| + """
|
| + parser = BuilderHTMLParser()
|
| + try:
|
| + parser.feed(urllib2.urlopen(url).read())
|
| + except (urllib2.URLError,):
|
| + print >> sys.stderr, 'Error getting', url
|
| + return []
|
| + parser.close()
|
| + return parser.failure_results
|
| +
|
| + Result = collections.namedtuple('Result', ['text', 'url'])
|
| +
|
| + def __init__(self):
|
| + HTMLParser.HTMLParser.__init__(self)
|
| + self.failure_results = []
|
| + self._current_failure_result = None
|
| + self._divlevel = None
|
| + self._li_level = 0
|
| + self._li_data = ''
|
| + self._current_failure = False
|
| + self._failure_results_url = ''
|
| +
|
| + def handle_starttag(self, tag, attrs):
|
| + """Overrides the HTMLParser method to implement functionality.
|
| +
|
| + [[begin standard library documentation]]
|
| + This method is called to handle the start of a tag
|
| + (e.g. <div id="main">).
|
| +
|
| + The tag argument is the name of the tag converted to lower
|
| + case. The attrs argument is a list of (name, value) pairs
|
| + containing the attributes found inside the tag's <>
|
| + brackets. The name will be translated to lower case, and
|
| + quotes in the value have been removed, and character and
|
| + entity references have been replaced.
|
| +
|
| + For instance, for the tag <A HREF="http://www.cwi.nl/">, this
|
| + method would be called as handle_starttag('a', [('href',
|
| + 'http://www.cwi.nl/')]).
|
| + [[end standard library documentation]]
|
| + """
|
| + attrs = dict(attrs)
|
| + if tag == 'li':
|
| + # <li> tags can be nested. So we have to count the
|
| + # nest-level for backing out.
|
| + self._li_level += 1
|
| + return
|
| + if tag == 'div' and attrs.get('class') == 'failure result':
|
| + # We care about this sort of thing:
|
| + # <li>
|
| + # <li>
|
| + # <li>
|
| + # <div class="failure result">...</div>
|
| + # </li>
|
| + # </li>
|
| + # We want this text here.
|
| + # </li>
|
| + if self._li_level > 0:
|
| + self._current_failure = True # Tells us to keep text.
|
| + return
|
| +
|
| + if tag == 'a' and self._current_failure:
|
| + href = attrs.get('href')
|
| + # Sometimes we want to keep the stdio url. We always
|
| + # return it, just in case.
|
| + if href.endswith('/logs/stdio'):
|
| + self._failure_results_url = href
|
| +
|
| + def handle_data(self, data):
|
| + """Overrides the HTMLParser method to implement functionality.
|
| +
|
| + [[begin standard library documentation]]
|
| + This method is called to process arbitrary data (e.g. text
|
| + nodes and the content of <script>...</script> and
|
| + <style>...</style>).
|
| + [[end standard library documentation]]
|
| + """
|
| + if self._current_failure:
|
| + self._li_data += data
|
| +
|
| + def handle_endtag(self, tag):
|
| + """Overrides the HTMLParser method to implement functionality.
|
| +
|
| + [[begin standard library documentation]]
|
| + This method is called to handle the end tag of an element
|
| + (e.g. </div>). The tag argument is the name of the tag
|
| + converted to lower case.
|
| + [[end standard library documentation]]
|
| + """
|
| + if tag == 'li':
|
| + self._li_level -= 1
|
| + if 0 == self._li_level:
|
| + if self._current_failure:
|
| + result = self._li_data.strip()
|
| + first = result.split()[0]
|
| + if first:
|
| + result = re.sub(
|
| + r'^%s(\s+%s)+' % (first, first), first, result)
|
| + # Sometimes, it repeats the same thing
|
| + # multiple times.
|
| + result = re.sub(r'unexpected flaky.*', '', result)
|
| + # Remove some extra unnecessary text.
|
| + result = re.sub(r'\bpreamble\b', '', result)
|
| + result = re.sub(r'\bstdio\b', '', result)
|
| + url = self._failure_results_url
|
| + self.failure_results.append(
|
| + BuilderHTMLParser.Result(result, url))
|
| + self._current_failure_result = None
|
| + # Reset the state.
|
| + self._current_failure = False
|
| + self._li_data = ''
|
| + self._failure_results_url = ''
|
| +
|
| +
|
| +def printer(indent, string):
|
| + """Print indented, wrapped text.
|
| + """
|
| + def wrap_to(line, columns):
|
| + """Wrap a line to the given number of columns, return a list
|
| + of strings.
|
| + """
|
| + ret = []
|
| + nextline = ''
|
| + for word in line.split():
|
| + if nextline:
|
| + if len(nextline) + 1 + len(word) > columns:
|
| + ret.append(nextline)
|
| + nextline = word
|
| + else:
|
| + nextline += (' ' + word)
|
| + else:
|
| + nextline = word
|
| + if nextline:
|
| + ret.append(nextline)
|
| + return ret
|
| + out = sys.stdout
|
| + spacer = ' '
|
| + for line in string.split('\n'):
|
| + for i, wrapped_line in enumerate(wrap_to(line, 68 - (2 * indent))):
|
| + out.write(spacer * indent)
|
| + if i > 0:
|
| + out.write(spacer)
|
| + out.write(wrapped_line)
|
| + out.write('\n')
|
| + out.flush()
|
| +
|
| +
|
| +def main(control_url, roll_url, verbosity=1):
|
| + """Compare two Codereview URLs
|
| +
|
| + Args:
|
| + control_url, roll_url: (strings) URL of the format
|
| + https://codereview.chromium.org/?????????
|
| +
|
| + verbosity: (int) verbose level. 0, 1, or 2.
|
| + """
|
| + # pylint: disable=I0011,R0914,R0912
|
| + control = CodeReviewHTMLParser.parse(control_url)
|
| + roll = CodeReviewHTMLParser.parse(roll_url)
|
| + if not (control and roll):
|
| + return
|
| +
|
| + control_name = '[control %s]' % control_url.split('/')[-1]
|
| + roll_name = '[roll %s]' % roll_url.split('/')[-1]
|
| + all_bots = set(control) & set(roll) # Set intersection.
|
| +
|
| + out = sys.stdout
|
| + if verbosity > 0:
|
| + # Print out summary of all of the bots.
|
| + out.write('%11s %11s %4s %s\n\n' %
|
| + ('CONTROL', 'ROLL', 'DIFF', 'BOT'))
|
| + for bot in sorted(all_bots):
|
| + if control[bot].status != roll[bot].status:
|
| + diff = '****'
|
| + elif (control[bot].status != 'success' or
|
| + roll[bot].status != 'success'):
|
| + diff = '....'
|
| + else:
|
| + diff = ''
|
| + out.write('%11s %11s %4s %s\n' % (
|
| + control[bot].status, roll[bot].status, diff, bot))
|
| + out.write('\n')
|
| + out.flush()
|
| +
|
| + for bot in sorted(all_bots):
|
| + if (roll[bot].status == 'success'):
|
| + if verbosity > 1:
|
| + printer(0, '==%s==' % bot)
|
| + printer(1, 'OK')
|
| + continue
|
| + printer(0, '==%s==' % bot)
|
| +
|
| + for (status, name, url) in (
|
| + (control[bot].status, control_name, control[bot].url),
|
| + (roll[bot].status, roll_name, roll[bot].url)):
|
| +
|
| + if status == 'failure':
|
| + printer(1, name)
|
| + results = BuilderHTMLParser.parse(url)
|
| + for result in results:
|
| + formatted_result = re.sub(
|
| + r'(\S*\.html) ', '\n__\g<1>\n', result.text)
|
| + printer(2, formatted_result)
|
| + if ('compile' in result.text
|
| + or '...and more' in result.text):
|
| + printer(3, re.sub('/[^/]*$', '/', url) + result.url)
|
| + else:
|
| + printer(1, name)
|
| + printer(2, status)
|
| + out.write('\n')
|
| +
|
| +
|
| +if __name__ == '__main__':
|
| + if len(sys.argv) < 3:
|
| + print >> sys.stderr, __doc__
|
| + exit(1)
|
| + main(sys.argv[1], sys.argv[2],
|
| + int(os.environ.get('COMPARE_CODEREVIEW_VERBOSITY', 1)))
|
| +
|
|
|