Index: tools/compare_codereview.py |
diff --git a/tools/compare_codereview.py b/tools/compare_codereview.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..16f3a0198d9e12647072ab3b3e96c8e8d9e566a9 |
--- /dev/null |
+++ b/tools/compare_codereview.py |
@@ -0,0 +1,387 @@ |
+#!/usr/bin/python2 |
+ |
+# Copyright 2014 Google Inc. |
+# |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Skia's Chromium Codereview Comparison Script. |
+ |
+This script takes two Codereview URLs, looks at the trybot results for |
+the two codereviews and compares the results. |
+ |
+Usage: |
+ compare_codereview.py CONTROL_URL ROLL_URL |
+""" |
+ |
+import collections |
+import os |
+import re |
+import sys |
+import urllib2 |
+import HTMLParser |
+ |
+ |
+class CodeReviewHTMLParser(HTMLParser.HTMLParser): |
+ """Parses CodeReview web page. |
+ |
+ Use the CodeReviewHTMLParser.parse static function to make use of |
+ this class. |
+ |
+ This uses the HTMLParser class because it's the best thing in |
+ Python's standard library. We need a little more power than a |
+ regex. [Search for "You can't parse [X]HTML with regex." for more |
+ information. |
+ """ |
+ # pylint: disable=I0011,R0904 |
+ @staticmethod |
+ def parse(url): |
+ """Parses a CodeReview web pages. |
+ |
+ Args: |
+ url (string), a codereview URL like this: |
+ 'https://codereview.chromium.org/?????????'. |
+ |
+ Returns: |
+ A dictionary; the keys are bot_name strings, the values |
+ are CodeReviewHTMLParser.Status objects |
+ """ |
+ parser = CodeReviewHTMLParser() |
+ try: |
+ parser.feed(urllib2.urlopen(url).read()) |
+ except (urllib2.URLError,): |
+ print >> sys.stderr, 'Error getting', url |
+ return None |
+ parser.close() |
+ return parser.statuses |
+ |
+ # namedtuples are like lightweight structs in Python. The low |
+ # overhead of a tuple, but the ease of use of an object. |
+ Status = collections.namedtuple('Status', ['status', 'url']) |
+ |
+ def __init__(self): |
+ HTMLParser.HTMLParser.__init__(self) |
+ self._id = None |
+ self._status = None |
+ self._href = None |
+ self._anchor_data = '' |
+ self._currently_parsing_trybotdiv = False |
+ # statuses is a dictionary of CodeReviewHTMLParser.Status |
+ self.statuses = {} |
+ |
+ def handle_starttag(self, tag, attrs): |
+ """Overrides the HTMLParser method to implement functionality. |
+ |
+ [[begin standard library documentation]] |
+ This method is called to handle the start of a tag |
+ (e.g. <div id="main">). |
+ |
+ The tag argument is the name of the tag converted to lower |
+ case. The attrs argument is a list of (name, value) pairs |
+ containing the attributes found inside the tag's <> |
+ brackets. The name will be translated to lower case, and |
+ quotes in the value have been removed, and character and |
+ entity references have been replaced. |
+ |
+ For instance, for the tag <A HREF="http://www.cwi.nl/">, this |
+ method would be called as handle_starttag('a', [('href', |
+ 'http://www.cwi.nl/')]). |
+ [[end standard library documentation]] |
+ """ |
+ attrs = dict(attrs) |
+ if tag == 'div': |
+ # We are looking for <div id="tryjobdiv*">. |
+ id_attr = attrs.get('id','') |
+ if id_attr.startswith('tryjobdiv'): |
+ self._id = id_attr |
+ if (self._id and tag == 'a' |
+ and 'build-result' in attrs.get('class', '').split()): |
+ # If we are already inside a <div id="tryjobdiv*">, we |
+ # look for a link if the form |
+ # <a class="build-result" href="*">. Then we save the |
+ # (non-standard) status attribute and the URL. |
+ self._status = attrs.get('status') |
+ self._href = attrs.get('href') |
+ self._currently_parsing_trybotdiv = True |
+ # Start saving anchor data. |
+ |
+ def handle_data(self, data): |
+ """Overrides the HTMLParser method to implement functionality. |
+ |
+ [[begin standard library documentation]] |
+ This method is called to process arbitrary data (e.g. text |
+ nodes and the content of <script>...</script> and |
+ <style>...</style>). |
+ [[end standard library documentation]] |
+ """ |
+ # Save the text inside the <a></a> tags. Assume <a> tags |
+ # aren't nested. |
+ if self._currently_parsing_trybotdiv: |
+ self._anchor_data += data |
+ |
+ def handle_endtag(self, tag): |
+ """Overrides the HTMLParser method to implement functionality. |
+ |
+ [[begin standard library documentation]] |
+ This method is called to handle the end tag of an element |
+ (e.g. </div>). The tag argument is the name of the tag |
+ converted to lower case. |
+ [[end standard library documentation]] |
+ """ |
+ if tag == 'a' and self._status: |
+ # We take the accumulated self._anchor_data and save it as |
+ # the bot name. |
+ bot = self._anchor_data.strip() |
+ stat = CodeReviewHTMLParser.Status(status=self._status, |
+ url=self._href) |
+ if bot: |
+ # Add to accumulating dictionary. |
+ self.statuses[bot] = stat |
+ # Reset state to search for the next bot. |
+ self._currently_parsing_trybotdiv = False |
+ self._anchor_data = '' |
+ self._status = None |
+ self._href = None |
+ |
+ |
+class BuilderHTMLParser(HTMLParser.HTMLParser): |
+ """parses Trybot web pages. |
+ |
+ Use the BuilderHTMLParser.parse static function to make use of |
+ this class. |
+ |
+ This uses the HTMLParser class because it's the best thing in |
+ Python's standard library. We need a little more power than a |
+ regex. [Search for "You can't parse [X]HTML with regex." for more |
+ information. |
+ """ |
+ # pylint: disable=I0011,R0904 |
+ @staticmethod |
+ def parse(url): |
+ """Parses a Trybot web page. |
+ |
+ Args: |
+ url (string), a trybot result URL. |
+ |
+ Returns: |
+ An array of BuilderHTMLParser.Results, each a description |
+ of failure results, along with an optional url |
+ """ |
+ parser = BuilderHTMLParser() |
+ try: |
+ parser.feed(urllib2.urlopen(url).read()) |
+ except (urllib2.URLError,): |
+ print >> sys.stderr, 'Error getting', url |
+ return [] |
+ parser.close() |
+ return parser.failure_results |
+ |
+ Result = collections.namedtuple('Result', ['text', 'url']) |
+ |
+ def __init__(self): |
+ HTMLParser.HTMLParser.__init__(self) |
+ self.failure_results = [] |
+ self._current_failure_result = None |
+ self._divlevel = None |
+ self._li_level = 0 |
+ self._li_data = '' |
+ self._current_failure = False |
+ self._failure_results_url = '' |
+ |
+ def handle_starttag(self, tag, attrs): |
+ """Overrides the HTMLParser method to implement functionality. |
+ |
+ [[begin standard library documentation]] |
+ This method is called to handle the start of a tag |
+ (e.g. <div id="main">). |
+ |
+ The tag argument is the name of the tag converted to lower |
+ case. The attrs argument is a list of (name, value) pairs |
+ containing the attributes found inside the tag's <> |
+ brackets. The name will be translated to lower case, and |
+ quotes in the value have been removed, and character and |
+ entity references have been replaced. |
+ |
+ For instance, for the tag <A HREF="http://www.cwi.nl/">, this |
+ method would be called as handle_starttag('a', [('href', |
+ 'http://www.cwi.nl/')]). |
+ [[end standard library documentation]] |
+ """ |
+ attrs = dict(attrs) |
+ if tag == 'li': |
+ # <li> tags can be nested. So we have to count the |
+ # nest-level for backing out. |
+ self._li_level += 1 |
+ return |
+ if tag == 'div' and attrs.get('class') == 'failure result': |
+ # We care about this sort of thing: |
+ # <li> |
+ # <li> |
+ # <li> |
+ # <div class="failure result">...</div> |
+ # </li> |
+ # </li> |
+ # We want this text here. |
+ # </li> |
+ if self._li_level > 0: |
+ self._current_failure = True # Tells us to keep text. |
+ return |
+ |
+ if tag == 'a' and self._current_failure: |
+ href = attrs.get('href') |
+ # Sometimes we want to keep the stdio url. We always |
+ # return it, just in case. |
+ if href.endswith('/logs/stdio'): |
+ self._failure_results_url = href |
+ |
+ def handle_data(self, data): |
+ """Overrides the HTMLParser method to implement functionality. |
+ |
+ [[begin standard library documentation]] |
+ This method is called to process arbitrary data (e.g. text |
+ nodes and the content of <script>...</script> and |
+ <style>...</style>). |
+ [[end standard library documentation]] |
+ """ |
+ if self._current_failure: |
+ self._li_data += data |
+ |
+ def handle_endtag(self, tag): |
+ """Overrides the HTMLParser method to implement functionality. |
+ |
+ [[begin standard library documentation]] |
+ This method is called to handle the end tag of an element |
+ (e.g. </div>). The tag argument is the name of the tag |
+ converted to lower case. |
+ [[end standard library documentation]] |
+ """ |
+ if tag == 'li': |
+ self._li_level -= 1 |
+ if 0 == self._li_level: |
+ if self._current_failure: |
+ result = self._li_data.strip() |
+ first = result.split()[0] |
+ if first: |
+ result = re.sub( |
+ r'^%s(\s+%s)+' % (first, first), first, result) |
+ # Sometimes, it repeats the same thing |
+ # multiple times. |
+ result = re.sub(r'unexpected flaky.*', '', result) |
+ # Remove some extra unnecessary text. |
+ result = re.sub(r'\bpreamble\b', '', result) |
+ result = re.sub(r'\bstdio\b', '', result) |
+ url = self._failure_results_url |
+ self.failure_results.append( |
+ BuilderHTMLParser.Result(result, url)) |
+ self._current_failure_result = None |
+ # Reset the state. |
+ self._current_failure = False |
+ self._li_data = '' |
+ self._failure_results_url = '' |
+ |
+ |
+def printer(indent, string): |
+ """Print indented, wrapped text. |
+ """ |
+ def wrap_to(line, columns): |
+ """Wrap a line to the given number of columns, return a list |
+ of strings. |
+ """ |
+ ret = [] |
+ nextline = '' |
+ for word in line.split(): |
+ if nextline: |
+ if len(nextline) + 1 + len(word) > columns: |
+ ret.append(nextline) |
+ nextline = word |
+ else: |
+ nextline += (' ' + word) |
+ else: |
+ nextline = word |
+ if nextline: |
+ ret.append(nextline) |
+ return ret |
+ out = sys.stdout |
+ spacer = ' ' |
+ for line in string.split('\n'): |
+ for i, wrapped_line in enumerate(wrap_to(line, 68 - (2 * indent))): |
+ out.write(spacer * indent) |
+ if i > 0: |
+ out.write(spacer) |
+ out.write(wrapped_line) |
+ out.write('\n') |
+ out.flush() |
+ |
+ |
+def main(control_url, roll_url, verbosity=1): |
+ """Compare two Codereview URLs |
+ |
+ Args: |
+ control_url, roll_url: (strings) URL of the format |
+ https://codereview.chromium.org/????????? |
+ |
+ verbosity: (int) verbose level. 0, 1, or 2. |
+ """ |
+ # pylint: disable=I0011,R0914,R0912 |
+ control = CodeReviewHTMLParser.parse(control_url) |
+ roll = CodeReviewHTMLParser.parse(roll_url) |
+ if not (control and roll): |
+ return |
+ |
+ control_name = '[control %s]' % control_url.split('/')[-1] |
+ roll_name = '[roll %s]' % roll_url.split('/')[-1] |
+ all_bots = set(control) & set(roll) # Set intersection. |
+ |
+ out = sys.stdout |
+ if verbosity > 0: |
+ # Print out summary of all of the bots. |
+ out.write('%11s %11s %4s %s\n\n' % |
+ ('CONTROL', 'ROLL', 'DIFF', 'BOT')) |
+ for bot in sorted(all_bots): |
+ if control[bot].status != roll[bot].status: |
+ diff = '****' |
+ elif (control[bot].status != 'success' or |
+ roll[bot].status != 'success'): |
+ diff = '....' |
+ else: |
+ diff = '' |
+ out.write('%11s %11s %4s %s\n' % ( |
+ control[bot].status, roll[bot].status, diff, bot)) |
+ out.write('\n') |
+ out.flush() |
+ |
+ for bot in sorted(all_bots): |
+ if (roll[bot].status == 'success'): |
+ if verbosity > 1: |
+ printer(0, '==%s==' % bot) |
+ printer(1, 'OK') |
+ continue |
+ printer(0, '==%s==' % bot) |
+ |
+ for (status, name, url) in ( |
+ (control[bot].status, control_name, control[bot].url), |
+ (roll[bot].status, roll_name, roll[bot].url)): |
+ |
+ if status == 'failure': |
+ printer(1, name) |
+ results = BuilderHTMLParser.parse(url) |
+ for result in results: |
+ formatted_result = re.sub( |
+ r'(\S*\.html) ', '\n__\g<1>\n', result.text) |
+ printer(2, formatted_result) |
+ if ('compile' in result.text |
+ or '...and more' in result.text): |
+ printer(3, re.sub('/[^/]*$', '/', url) + result.url) |
+ else: |
+ printer(1, name) |
+ printer(2, status) |
+ out.write('\n') |
+ |
+ |
+if __name__ == '__main__': |
+ if len(sys.argv) < 3: |
+ print >> sys.stderr, __doc__ |
+ exit(1) |
+ main(sys.argv[1], sys.argv[2], |
+ int(os.environ.get('COMPARE_CODEREVIEW_VERBOSITY', 1))) |
+ |