buildlogparse.py - Issue 13892003: Added buildbot appengine frontend for chromium-build app

Unified Diff: buildlogparse.py

Issue 13892003: Added buildbot appengine frontend for chromium-build app (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/chromium-build

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: buildlogparse.py

diff --git a/buildlogparse.py b/buildlogparse.py

new file mode 100644

index 0000000000000000000000000000000000000000..a8d93873f49a21fb0a106df06b8c5cf76bd32adb

--- /dev/null

+++ b/buildlogparse.py

@@ -0,0 +1,476 @@

+import webapp2

+from google.appengine.ext import db

+from datetime import timedelta

+import cStringIO

+import time

+import jinja2

+import datetime

+import re

+import logging

+import urllib

+from google.appengine.api import urlfetch

+import base64

+import urlparse

+import os

+import json

+import Queue

+import os

+import zlib

+from google.appengine.api import users

+from google.appengine.api import memcache

+from google.appengine.ext import deferred

+from google.appengine.api import files

+from google.appengine.api import mail

agable 2013/04/15 19:33:30 Please cleanup imports to be only what you actuall

Ryan Tseng 2013/04/17 22:53:48 Done.

+VERSION_ID = os.environ['CURRENT_VERSION_ID']

+jinja_environment = jinja2.Environment(

+ loader=jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__),

+ 'templates')),

+ autoescape=True,

+ extensions=['jinja2.ext.autoescape'])

+if os.environ.get('HTTP_HOST'):

+ APP_URL = os.environ['HTTP_HOST']

+else:

+ APP_URL = os.environ['SERVER_NAME']

+REPLACEMENTS = [

+ # Find ../../scripts/.../*.py scripts and add links to them.

+ (r'\.\./\.\./\.\./scripts/(.*)\.py',

+ r'<a href="https://code.google.com/p/chromium/codesearch#chromium/tools/'

+ r'build/scripts/\1.py">../../scripts/\1.py</a>'),

+ # Find ../../chrome/.../*.cc files and add links to them.

+ (r'\.\./\.\./chrome/(.*)\.cc:(\d+)',

+ r'<a href="https://code.google.com/p/chromium/codesearch#chromium/src/'

+ r'chrome/\1.cc&l=\2">../../chrome/\1.cc:\2</a>'),

+ # Searches for codereview issue numbers, and add codereview links.

+ (r'apply_issue(.*)-i (\d{8})(.*)-s (.*)',

+ r'apply_issue\1-i <a href="\4/\2">\2</a>\3-s \4'),

+ # Add green labels to PASSED items.

+ (r'\[(  PASSED  )\]',

+ r'[\1]'),

+ # Add red labels to FAILED items.

+ (r'\[(  FAILED  )\]',

+ r'[\1]'),

+ # Add black labels ot RUN items.

+ (r'\[( RUN      )\]',

+ r'[\1]'),

+ # Add badges to running tests.

+ (r'\[(( )*\d+/\d+)\](( )+)(\d+\.\d+s) '

+ r'([\w/]+\.[\w/]+) $([\d.s]+)$',

+ r'\1\3'

+ r'\5 \6 \7'),

+ # Add gray labels to [==========] blocks.

+ (r'\[([-=]{10})\]',

+ r'[\1]'),

+ # Find .cc and .h files and add codesite links to them.

+ (r'\.\./\.\./([\w/-]+)\.(cc|h): ',

+ r'<a href="https://code.google.com/p/chromium/codesearch#chromium/src/'

+ r'\1.\2">../../\1.\2</a>: '),

+ # Find source files with line numbers and add links to them.

+ (r'\.\./\.\./([\w/-]+)\.(cc|h):(\d+): ',

+ r'<a href="https://code.google.com/p/chromium/codesearch#chromium/src/'

+ r'\1.\2&l=\3">../../\1.\2:\3</a>: '),

+ # Add badges to compiling items.

+ r'\1 '

+ r'\2'),

+ # Bold the LHS of A=B text.

+ (r'^(( )*)(\w+)=([\w:/-_.]+)',

+ r'\1\3=\4'),

+###############

+# Jinja filters

+###############

+def delta_time(delta):

+ hours = int(delta/60/60)

+ minutes = int((delta - hours * 3600)/60)

+ seconds = int(delta - (hours * 3600) - (minutes * 60))

+ result = ''

+ if hours > 1:

+ result += '%d hrs ' % hours

agable 2013/04/15 19:33:30 nit: add commas after hr/hrs, min/mins, and a peri

Ryan Tseng 2013/04/17 22:53:48 Done.

+ elif hours:

+ result += '%d hr ' % hours

+ if minutes > 1:

+ result += '%d mins ' % minutes

+ elif minutes:

+ result += '%d min ' % minutes

+ if not hours:

+ if seconds > 1:

+ result += '%d secs' % seconds

+ else:

+ result += '%d sec' % seconds

+ return result

+jinja_environment.filters['delta_time'] = delta_time

+def time_since(timestamp):

+ delta = time.time() - timestamp

+ return delta_time(delta)

+jinja_environment.filters['time_since'] = time_since

+def nl2br(value):

+ return value.replace('\n',' \n')

+jinja_environment.filters['nl2br'] = nl2br

+def cl_comment(value):

+ """Add links to https:// addresses, BUG=####, and trim excessive newlines."""

+ value = re.sub(r'(https?://.*)', r'<a href="\1">\1</a>', value)

+ value = re.sub(

+ r'BUG=(\d+)', r'BUG=<a href="http://crbug.com/\1">\1</a>', value)

+ # value = re.sub(r'\n\n', r'\n', value)

+ value = re.sub(r'\n', r' ', value)

+ return value

+jinja_environment.filters['cl_comment'] = cl_comment

+########

+# Models

+########

+class BuildLogModel(db.Model):

+ # Used for caching finished build logs.

+ url = db.StringProperty()

+ data = db.BlobProperty()

+class BuildLogResultModel(db.Model):

+ # Used for caching finished and parsed build logs.

+ url = db.StringProperty()

+ version = db.StringProperty()

+ data = db.BlobProperty()

+############

+# Decorators

+############

+def render(template_filename):

agable 2013/04/15 19:33:30 Docstring, similar to the one for render_json belo

Ryan Tseng 2013/04/17 22:53:48 Done.

+ def _render(fn):

+ def wrapper(self, *args, **kwargs):

+ results = fn(self, *args, **kwargs)

+ template = jinja_environment.get_template(template_filename)

+ self.response.out.write(template.render(results))

+ return wrapper

+ return _render

+def render_json(fn):

+ # The function is expected to return a dict, and we want to render json.

agable 2013/04/15 19:33:30 Make this a real docstring.

Ryan Tseng 2013/04/17 22:53:48 Done.

+ def wrapper(self, *args, **kwargs):

+ results = fn(self, *args, **kwargs)

+ self.response.out.write(json.dumps(results))

+ return wrapper

+def return_json_if_flag_is_set_else_render(template_filename):

agable 2013/04/15 19:33:30 maybe_return_json?

Ryan Tseng 2013/04/17 22:53:48 Works

+ """If the variable 'json' exists in the request, return a json object.

+ Otherwise render the page using the template"""

+ def _render(fn):

+ def wrapper(self, *args, **kwargs):

+ results = fn(self, *args, **kwargs)

+ if self.request.get('json'):

+ self.response.out.write(json.dumps(results))

+ else:

+ template = jinja_environment.get_template(template_filename)

+ self.response.out.write(template.render(results))

+ return wrapper

+ return _render

+def login_required(fn):

+ """Redirect user to a login page."""

+ def wrapper(self, *args, **kwargs):

+ user = users.get_current_user()

+ if not user:

+ self.redirect(users.create_login_url(self.request.uri))

+ return

+ else:

+ return fn(self, *args, **kwargs)

+ return wrapper

+def google_login_required(fn):

+ """Return 403 unless the user is logged in from a @google.com domain"""

+ def wrapper(self, *args, **kwargs):

+ user = users.get_current_user()

+ if not user:

+ self.redirect(users.create_login_url(self.request.uri))

+ return

+ email_match = re.match('^(.*)@(.*)$', user.email())

+ if email_match:

+ _, domain = email_match.groups()

+ if domain == 'google.com':

+ return fn(self, *args, **kwargs)

+ self.error(403) # Unrecognized email or unauthroized domain.

+ self.response.out.write('unauthroized email %s' % user.user_id())

+ return wrapper

+def admin_required(fn):

+ """Return 403 unless an admin is logged in"""

agable 2013/04/15 19:33:30 Give all of these docstrings periods -- they're se

Ryan Tseng 2013/04/17 22:53:48 Done.

+ def wrapper(self, *args, **kwargs):

+ user = users.get_current_user()

+ if not user:

+ self.redirect(users.create_login_url(self.request.uri))

+ return

+ elif not users.is_current_user_admin():

+ self.error(403)

+ return

+ else:

+ return fn(self, *args, **kwargs)

+ return wrapper

+def expect_request(*request_args):

agable 2013/04/15 19:33:30 expect_request_param? expect_request sounds like i

Ryan Tseng 2013/04/17 22:53:48 Works for me. Done

+ """Strips out the expected args from a request and feeds it into the function

+ as the arguments. Optionally, typecast the argument from a string into a

+ different class. Examples include:

+ name (Get the request object called "name")

+ time as timestamp (Get "time", pass it in as "timestamp")

+ """

+ def _decorator(fn):

+ def wrapper(self, *args, **kwargs):

+ request_kwargs = {}

+ for arg in request_args:

+ arg_match = re.match(r'^($(\w+)$)?\s*(\w+)( as (\w+))?$', arg)

+ if arg_match:

+ _, target_type_name, name, _, target_name = arg_match.groups()

+ if not target_name:

+ target_name = name

+ request_item = self.request.get(name)

+ request_kwargs[target_name] = request_item

+ else:

+ raise Exception('Incorrect format %s' % arg)

+ kwargs.update(request_kwargs)

+ return fn(self, *args, **kwargs)

+ return wrapper

+ return _decorator

agable 2013/04/15 19:33:30 All these wrappers are really nice and general. On

Ryan Tseng 2013/04/17 22:53:48 Or I can do that now :)

+def emit(source, out):

+ # TODO(hinoka): This currently employs a "lookback" strategy

+ # (Find [PASS/FAIL], then goes back and marks all of the lines.)

+ # This should be switched to a "scan twice" strategy. 1st pass creates a

+ # Test Name -> PASS/FAIL/INCOMPLETE dictionary, and 2nd pass marks the lines.

+ title = source

agable 2013/04/15 19:33:30 Remove this, title is never used.

Ryan Tseng 2013/04/17 22:53:48 Done.

+ attr = []

+ if source == 'header':

+ attr.append('text-info')

+ lines = []

+ current_test = None

+ current_test_line = 0

+ for line in out.split('\n'):

+ if line:

+ test_match = re.search(r'\[ RUN \]\s*([^() ]*)\s*', line)

agable 2013/04/15 19:33:30 Here you're searching for [ RUN ], while earli

Ryan Tseng 2013/04/17 22:53:48 This set of regex is a bit special in that its not

+ line_attr = attr[:]

+ if test_match:

+ # This line is a "We're running a test" line.

+ current_test = test_match.group(1).strip()

+ current_test_line = len(lines)

+ elif '[ OK ]' in line or '[ PASSED ]' in line:

+ line_attr.append('text-success')

+ test_match = re.search(r'\[ OK \]\s*([^(), ]*)\s*', line)

+ if test_match:

+ finished_test = test_match.group(1).strip()

+ for line_item in lines[current_test_line:]:

+ if finished_test == current_test:

+ line_item[2].append('text-success')

+ else:

+ line_item[2].append('text-error')

+ current_test = None

+ elif '[ FAILED ]' in line:

+ line_attr.append('text-error')

+ test_match = re.search(r'\[ FAILED \]\s*([^(), ]*)\s*', line)

+ if test_match:

+ finished_test = test_match.group(1).strip()

+ for line_item in lines[current_test_line:]:

+ if finished_test == current_test:

+ line_item[2].append('text-error')

+ current_test = None

+ elif re.search(r'\[.{10}\]', line):

+ current_test = None

+ elif re.search(r'\[\s*\d+/\d+\]\s*\d+\.\d+s\s+[\w/]+\.'

agable 2013/04/15 19:33:30 Document your regexes :)

Ryan Tseng 2013/04/17 22:53:48 Done.

+ r'[\w/]+\s+$[\d.s]+$', line):

+ current_test = None

+ line_attr.append('text-success')

+ elif 'aborting test' in line:

+ current_test = None

+ elif current_test:

+ line_attr.append('text-warning')

+ if len(line) > 160:

agable 2013/04/15 19:33:30 Why 160?

Ryan Tseng 2013/04/17 22:53:48 That was arbitrary. I think I'll remove this and

+ line_abbr = line[:160]

+ line_abbr = line_abbr.replace(' ', ' ')

+ line = line.replace(' ', ' ')

+ if 'apply_issue' in line:

+ logging.warning(line)

+ for rep_from, rep_to in REPLACEMENTS:

+ line_abbr = re.sub(rep_from, rep_to, line_abbr)

+ line = re.sub(rep_from, rep_to, line)

+ lines.append((line_abbr, line, line_attr))

+ else:

+ line = line.replace(' ', ' ')

+ for rep_from, rep_to in REPLACEMENTS:

+ line = re.sub(rep_from, rep_to, line)

+ lines.append((None, line, line_attr))

agable 2013/04/15 19:33:30 Can pull this duplicated code (line.replace; for f

Ryan Tseng 2013/04/17 22:53:48 Removed line_abbr anyways.

+ return (title, lines)

agable 2013/04/15 19:33:30 Remove 'return title', it is identical to the inpu

Ryan Tseng 2013/04/17 22:53:48 Done.

+class BuildStep(webapp2.RequestHandler):

+ """Prases a build step page."""

agable 2013/04/15 19:33:30 Parses. He how prases the build step page.

Ryan Tseng 2013/04/17 22:53:48 Done.

+ @render('step.html')

+ @expect_request('url')

+ def get(self, url):

+ if not url:

+ self.redirect('/buildbot/')

agable 2013/04/15 19:33:30 See comment below about having url be a required u

Ryan Tseng 2013/04/17 22:53:48 Done.

+ # Fetch the page.

+ sch, netloc, path, _, _, _ = urlparse.urlparse(url)

+ url_m = re.match(r'^/((p/)?)(.*)/builders/(.*)/builds/(\d+)$', path)

agable 2013/04/15 19:33:30 Offline comment about this (p/)? to follow.

Ryan Tseng 2013/04/17 22:53:48 ?

+ if not url_m:

+ self.redirect('/buildbot/')

+ prefix, _, master, builder, step = url_m.groups()

+ json_url = '%s://%s/%s%s/json/builders/%s/builds/%s' % (

+ sch, netloc, prefix, master, builder, step)

+ s = urlfetch.fetch(json_url.replace(' ', '%20'),

+ method=urlfetch.GET, deadline=60).content

+ logging.info(s)

+ result = json.loads(s)

+ # Add on some extraneous info.

+ build_properties = dict((name, value) for name, value, _

+ in result['properties'])

+ if 'rietveld' in build_properties:

+ result['rietveld'] = build_properties['rietveld']

+ result['breadcrumbs'] = [

+ ('Master %s' % master, '#'),

+ ('Builder %s' % builder, '#'),

+ ('Build Number %s' % step, '#'),

+ ('Slave %s' % result['slave'], '#')

+ ]

+ return result

+class MainPage(webapp2.RequestHandler):

+ """Parses a buildlog page."""

+ @render('main.html')

+ @expect_request('url')

agable 2013/04/15 19:33:30 Having a *required* url parameter is kinda weird.

Ryan Tseng 2013/04/17 22:53:48 Done. MainPage now just parses the url and redire

+ def get(self, url):

agable 2013/04/15 19:33:30 I'd reorder the steps this method performs for bet

Ryan Tseng 2013/04/17 22:53:48 Refactored to just do #1. The rest has also been

+ if not url:

+ return {}

+ # Redirect the page if we detect a different type of URL.

+ sch, netloc, path, _, _, _ = urlparse.urlparse(url)

+ logging.info(path)

+ if re.match(r'^/((p/)?)(.*)/builders/(.*)/builds/(\d+)$', path):

+ self.redirect('/buildbot/step?url=%s' % url)

+ return {}

+ buildlog_query = BuildLogModel.all().filter('url =', url)

+ buildlog = buildlog_query.get()

agable 2013/04/15 19:33:30 377 and 378 can be one line.

Ryan Tseng 2013/04/17 22:53:48 Done.

+ log_fetch_start = time.time()

+ if buildlog:

+ s = zlib.decompress(buildlog.data)

+ else:

+ s = urlfetch.fetch(url, method=urlfetch.GET, deadline=60).content

+ log_fetch_time = time.time() - log_fetch_start

+ all_output = re.findall(r'(.*?)',

agable 2013/04/15 19:33:30 Don't bother performing this regex unless the cach

Ryan Tseng 2013/04/17 22:53:48 Done.

+ s, re.S)

+ cached_result = BuildLogResultModel.all().filter(

+ 'url =', url).filter('version =', VERSION_ID).get()

+ parse_time_start = time.time()

+ if cached_result:

+ result_output = json.loads(zlib.decompress(cached_result.data))

+ else:

+ result_output = []

+ current_source = None

+ current_string = ''

+ for source, output in all_output:

+ if source == current_source:

+ current_string += output

+ continue

+ else:

+ # We hit a new source, we want to emit whatever we had left and

+ # start anew.

+ if current_string:

+ result_output.append(emit(current_source, current_string))

+ current_string = output

+ current_source = source

+ if current_string:

+ result_output.append(emit(current_source, current_string))

+ compressed_result = zlib.compress(json.dumps(result_output))

+ if len(compressed_result) < 1000 * 1000:

agable 2013/04/15 19:33:30 Use 10**6

Ryan Tseng 2013/04/17 22:53:48 Done.

+ cached_result = BuildLogResultModel(

+ url=url, version=VERSION_ID, data=compressed_result)

+ cached_result.put()

+ url_re = r'/[p]/([\w.]+)/builders/(\w+)/builds/(\w+)/steps/(\w+)/logs/.*'

+ master_name, builder_name, build_number, step = re.search(

+ url_re, url).groups()

+ ret_code_m = re.search('program finished with exit code (-?\d+)', s)

+ if ret_code_m:

+ ret_code = int(ret_code_m.group(1))

+ if ret_code == 0:

+ status = 'OK'

+ else:

+ status = 'ERROR'

+ else:

+ status = 'RUNNING'

+ ret_code = None

+ if ret_code is not None and not buildlog:

+ # Cache this build log if not already.

+ compressed_data = zlib.compress(s)

+ if len(compressed_data) < 1000 * 1000:

+ buildlog = BuildLogModel(url=url, data=compressed_data)

+ buildlog.put()

+ parse_time = time.time() - parse_time_start

+ return {

+ 'output': result_output,

+ 'url': url,

+ 'name': step,

+ 'breadcrumbs': [

+ ('Master %s' % master_name,

+ 'http://build.chromium.org/p/%s/waterfall' % master_name),

+ ('Builder %s' % builder_name,

+ 'http://build.chromium.org/p/%s/builders/%s' %

+ (master_name, builder_name)),

+ ('Build Number %s ' % build_number,

+ 'http://build.chromium.org/p/%s/builders/%s/builds/%s' %

+ (master_name, builder_name, build_number)),

+ ('Step %s' % step, url)

+ ],

+ 'status': status,

+ 'ret_code': ret_code,

+ 'log_fetch_time': log_fetch_time,

+ 'parse_time': parse_time,

+ 'compressed_size': len(buildlog.data) if buildlog else -1,

+ 'compressed_report': len(cached_result.data) if cached_result else -1,

+ 'url': url,

+ 'debug': self.request.get('debug'),

+ 'size': len(s)

+ }

agable 2013/04/15 19:33:30 Could cache the compressed version of this whole j

Ryan Tseng 2013/04/17 22:53:48 done :) (That's what line 388/412 is) Well, it cac

+def webapp_add_wsgi_middleware(app):

+ from google.appengine.ext.appstats import recording

+ app = recording.appstats_wsgi_middleware(app)

+ return app

+app = webapp2.WSGIApplication([

+ ('/buildbot/', MainPage),

+ ('/buildbot/step/?', BuildStep),

agable 2013/04/15 19:33:30 See comments on MainPage and BuildStep get methods

Ryan Tseng 2013/04/17 22:53:48 Done.

+ ], debug=True)

+app = webapp_add_wsgi_middleware(app)

« no previous file with comments | « app.yaml ('k') | static/css/bootstrap.css » ('j') | static/css/bootstrap-responsive.css » ('J')