Chromium Code Reviews| Index: buildlogparse.py |
| diff --git a/buildlogparse.py b/buildlogparse.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..a8d93873f49a21fb0a106df06b8c5cf76bd32adb |
| --- /dev/null |
| +++ b/buildlogparse.py |
| @@ -0,0 +1,476 @@ |
| + |
| + |
| +import webapp2 |
| +from google.appengine.ext import db |
| +from datetime import timedelta |
| +import cStringIO |
| +import time |
| +import jinja2 |
| +import datetime |
| +import re |
| +import logging |
| +import urllib |
| +from google.appengine.api import urlfetch |
| +import base64 |
| +import urlparse |
| +import os |
| +import json |
| +import Queue |
| +import os |
| +import zlib |
| +from google.appengine.api import users |
| +from google.appengine.api import memcache |
| +from google.appengine.ext import deferred |
| +from google.appengine.api import files |
| +from google.appengine.api import mail |
|
agable
2013/04/15 19:33:30
Please cleanup imports to be only what you actuall
Ryan Tseng
2013/04/17 22:53:48
Done.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + |
| +VERSION_ID = os.environ['CURRENT_VERSION_ID'] |
| + |
| +jinja_environment = jinja2.Environment( |
| + loader=jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__), |
| + 'templates')), |
| + autoescape=True, |
| + extensions=['jinja2.ext.autoescape']) |
| + |
| +if os.environ.get('HTTP_HOST'): |
| + APP_URL = os.environ['HTTP_HOST'] |
| +else: |
| + APP_URL = os.environ['SERVER_NAME'] |
| + |
| +REPLACEMENTS = [ |
| + # Find ../../scripts/.../*.py scripts and add links to them. |
| + (r'\.\./\.\./\.\./scripts/(.*)\.py', |
| + r'<a href="https://code.google.com/p/chromium/codesearch#chromium/tools/' |
| + r'build/scripts/\1.py">../../scripts/\1.py</a>'), |
| + |
| + # Find ../../chrome/.../*.cc files and add links to them. |
| + (r'\.\./\.\./chrome/(.*)\.cc:(\d+)', |
| + r'<a href="https://code.google.com/p/chromium/codesearch#chromium/src/' |
| + r'chrome/\1.cc&l=\2">../../chrome/\1.cc:\2</a>'), |
| + |
| + # Searches for codereview issue numbers, and add codereview links. |
| + (r'apply_issue(.*)-i (\d{8})(.*)-s (.*)', |
| + r'apply_issue\1-i <a href="\4/\2">\2</a>\3-s \4'), |
| + |
| + # Add green labels to PASSED items. |
| + (r'\[( PASSED )\]', |
| + r'<span class="label label-success">[\1]</span>'), |
| + |
| + # Add red labels to FAILED items. |
| + (r'\[( FAILED )\]', |
| + r'<span class="label label-important">[\1]</span>'), |
| + |
| + # Add black labels ot RUN items. |
| + (r'\[( RUN )\]', |
| + r'<span class="label label-inverse">[\1]</span>'), |
| + |
| + # Add badges to running tests. |
| + (r'\[(( )*\d+/\d+)\](( )+)(\d+\.\d+s) ' |
| + r'([\w/]+\.[\w/]+) \(([\d.s]+)\)', |
| + r'<span class="badge badge-success">\1</span>\3<span class="badge">' |
| + r'\5</span> \6 <span class="badge">\7</span>'), |
| + |
| + # Add gray labels to [==========] blocks. |
| + (r'\[([-=]{10})\]', |
| + r'<span class="label">[\1]</span>'), |
| + |
| + # Find .cc and .h files and add codesite links to them. |
| + (r'\.\./\.\./([\w/-]+)\.(cc|h): ', |
| + r'<a href="https://code.google.com/p/chromium/codesearch#chromium/src/' |
| + r'\1.\2">../../\1.\2</a>: '), |
| + |
| + # Find source files with line numbers and add links to them. |
| + (r'\.\./\.\./([\w/-]+)\.(cc|h):(\d+): ', |
| + r'<a href="https://code.google.com/p/chromium/codesearch#chromium/src/' |
| + r'\1.\2&l=\3">../../\1.\2:\3</a>: '), |
| + |
| + # Add badges to compiling items. |
| + (r'\[(\d+/\d+)\] (CXX|AR|STAMP|CC|ACTION|RULE|COPY)', |
| + r'<span class="badge badge-info">\1</span> ' |
| + r'<span class="badge">\2</span>'), |
| + |
| + # Bold the LHS of A=B text. |
| + (r'^(( )*)(\w+)=([\w:/-_.]+)', |
| + r'\1<strong>\3</strong>=\4'), |
| +] |
| + |
| +############### |
| +# Jinja filters |
| +############### |
| + |
| +def delta_time(delta): |
| + hours = int(delta/60/60) |
| + minutes = int((delta - hours * 3600)/60) |
| + seconds = int(delta - (hours * 3600) - (minutes * 60)) |
| + result = '' |
| + if hours > 1: |
| + result += '%d hrs ' % hours |
|
agable
2013/04/15 19:33:30
nit: add commas after hr/hrs, min/mins, and a peri
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + elif hours: |
| + result += '%d hr ' % hours |
| + if minutes > 1: |
| + result += '%d mins ' % minutes |
| + elif minutes: |
| + result += '%d min ' % minutes |
| + if not hours: |
| + if seconds > 1: |
| + result += '%d secs' % seconds |
| + else: |
| + result += '%d sec' % seconds |
| + return result |
| +jinja_environment.filters['delta_time'] = delta_time |
| + |
| +def time_since(timestamp): |
| + delta = time.time() - timestamp |
| + return delta_time(delta) |
| +jinja_environment.filters['time_since'] = time_since |
| + |
| +def nl2br(value): |
| + return value.replace('\n','<br>\n') |
| +jinja_environment.filters['nl2br'] = nl2br |
| + |
| +def cl_comment(value): |
| + """Add links to https:// addresses, BUG=####, and trim excessive newlines.""" |
| + value = re.sub(r'(https?://.*)', r'<a href="\1">\1</a>', value) |
| + value = re.sub( |
| + r'BUG=(\d+)', r'BUG=<a href="http://crbug.com/\1">\1</a>', value) |
| + # value = re.sub(r'\n\n', r'\n', value) |
| + value = re.sub(r'\n', r'<br>', value) |
| + return value |
| +jinja_environment.filters['cl_comment'] = cl_comment |
| + |
| +######## |
| +# Models |
| +######## |
| + |
| +class BuildLogModel(db.Model): |
| + # Used for caching finished build logs. |
| + url = db.StringProperty() |
| + data = db.BlobProperty() |
| + |
| +class BuildLogResultModel(db.Model): |
| + # Used for caching finished and parsed build logs. |
| + url = db.StringProperty() |
| + version = db.StringProperty() |
| + data = db.BlobProperty() |
| + |
| + |
| +############ |
| +# Decorators |
| +############ |
| +def render(template_filename): |
|
agable
2013/04/15 19:33:30
Docstring, similar to the one for render_json belo
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + def _render(fn): |
| + def wrapper(self, *args, **kwargs): |
| + results = fn(self, *args, **kwargs) |
| + template = jinja_environment.get_template(template_filename) |
| + self.response.out.write(template.render(results)) |
| + return wrapper |
| + return _render |
| + |
| +def render_json(fn): |
| + # The function is expected to return a dict, and we want to render json. |
|
agable
2013/04/15 19:33:30
Make this a real docstring.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + def wrapper(self, *args, **kwargs): |
| + results = fn(self, *args, **kwargs) |
| + self.response.out.write(json.dumps(results)) |
| + return wrapper |
| + |
| +def return_json_if_flag_is_set_else_render(template_filename): |
|
agable
2013/04/15 19:33:30
maybe_return_json?
Ryan Tseng
2013/04/17 22:53:48
Works
|
| + """If the variable 'json' exists in the request, return a json object. |
| + Otherwise render the page using the template""" |
| + def _render(fn): |
| + def wrapper(self, *args, **kwargs): |
| + results = fn(self, *args, **kwargs) |
| + if self.request.get('json'): |
| + self.response.out.write(json.dumps(results)) |
| + else: |
| + template = jinja_environment.get_template(template_filename) |
| + self.response.out.write(template.render(results)) |
| + return wrapper |
| + return _render |
| + |
| +def login_required(fn): |
| + """Redirect user to a login page.""" |
| + def wrapper(self, *args, **kwargs): |
| + user = users.get_current_user() |
| + if not user: |
| + self.redirect(users.create_login_url(self.request.uri)) |
| + return |
| + else: |
| + return fn(self, *args, **kwargs) |
| + return wrapper |
| + |
| +def google_login_required(fn): |
| + """Return 403 unless the user is logged in from a @google.com domain""" |
| + def wrapper(self, *args, **kwargs): |
| + user = users.get_current_user() |
| + if not user: |
| + self.redirect(users.create_login_url(self.request.uri)) |
| + return |
| + email_match = re.match('^(.*)@(.*)$', user.email()) |
| + if email_match: |
| + _, domain = email_match.groups() |
| + if domain == 'google.com': |
| + return fn(self, *args, **kwargs) |
| + self.error(403) # Unrecognized email or unauthroized domain. |
| + self.response.out.write('unauthroized email %s' % user.user_id()) |
| + return wrapper |
| + |
| +def admin_required(fn): |
| + """Return 403 unless an admin is logged in""" |
|
agable
2013/04/15 19:33:30
Give all of these docstrings periods -- they're se
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + def wrapper(self, *args, **kwargs): |
| + user = users.get_current_user() |
| + if not user: |
| + self.redirect(users.create_login_url(self.request.uri)) |
| + return |
| + elif not users.is_current_user_admin(): |
| + self.error(403) |
| + return |
| + else: |
| + return fn(self, *args, **kwargs) |
| + return wrapper |
| + |
| +def expect_request(*request_args): |
|
agable
2013/04/15 19:33:30
expect_request_param? expect_request sounds like i
Ryan Tseng
2013/04/17 22:53:48
Works for me. Done
|
| + """Strips out the expected args from a request and feeds it into the function |
| + as the arguments. Optionally, typecast the argument from a string into a |
| + different class. Examples include: |
| + name (Get the request object called "name") |
| + time as timestamp (Get "time", pass it in as "timestamp") |
| + """ |
| + def _decorator(fn): |
| + def wrapper(self, *args, **kwargs): |
| + request_kwargs = {} |
| + for arg in request_args: |
| + arg_match = re.match(r'^(\((\w+)\))?\s*(\w+)( as (\w+))?$', arg) |
| + if arg_match: |
| + _, target_type_name, name, _, target_name = arg_match.groups() |
| + if not target_name: |
| + target_name = name |
| + request_item = self.request.get(name) |
| + request_kwargs[target_name] = request_item |
| + else: |
| + raise Exception('Incorrect format %s' % arg) |
| + kwargs.update(request_kwargs) |
| + return fn(self, *args, **kwargs) |
| + return wrapper |
| + return _decorator |
|
agable
2013/04/15 19:33:30
All these wrappers are really nice and general. On
Ryan Tseng
2013/04/17 22:53:48
Or I can do that now :)
|
| + |
| +def emit(source, out): |
| + # TODO(hinoka): This currently employs a "lookback" strategy |
| + # (Find [PASS/FAIL], then goes back and marks all of the lines.) |
| + # This should be switched to a "scan twice" strategy. 1st pass creates a |
| + # Test Name -> PASS/FAIL/INCOMPLETE dictionary, and 2nd pass marks the lines. |
| + title = source |
|
agable
2013/04/15 19:33:30
Remove this, title is never used.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + attr = [] |
| + if source == 'header': |
| + attr.append('text-info') |
| + lines = [] |
| + current_test = None |
| + current_test_line = 0 |
| + for line in out.split('\n'): |
| + if line: |
| + test_match = re.search(r'\[ RUN \]\s*([^() ]*)\s*', line) |
|
agable
2013/04/15 19:33:30
Here you're searching for [ RUN ], while earli
Ryan Tseng
2013/04/17 22:53:48
This set of regex is a bit special in that its not
|
| + line_attr = attr[:] |
| + if test_match: |
| + # This line is a "We're running a test" line. |
| + current_test = test_match.group(1).strip() |
| + current_test_line = len(lines) |
| + elif '[ OK ]' in line or '[ PASSED ]' in line: |
| + line_attr.append('text-success') |
| + test_match = re.search(r'\[ OK \]\s*([^(), ]*)\s*', line) |
| + if test_match: |
| + finished_test = test_match.group(1).strip() |
| + for line_item in lines[current_test_line:]: |
| + if finished_test == current_test: |
| + line_item[2].append('text-success') |
| + else: |
| + line_item[2].append('text-error') |
| + current_test = None |
| + elif '[ FAILED ]' in line: |
| + line_attr.append('text-error') |
| + test_match = re.search(r'\[ FAILED \]\s*([^(), ]*)\s*', line) |
| + if test_match: |
| + finished_test = test_match.group(1).strip() |
| + for line_item in lines[current_test_line:]: |
| + if finished_test == current_test: |
| + line_item[2].append('text-error') |
| + current_test = None |
| + elif re.search(r'\[.{10}\]', line): |
| + current_test = None |
| + elif re.search(r'\[\s*\d+/\d+\]\s*\d+\.\d+s\s+[\w/]+\.' |
|
agable
2013/04/15 19:33:30
Document your regexes :)
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + r'[\w/]+\s+\([\d.s]+\)', line): |
| + current_test = None |
| + line_attr.append('text-success') |
| + elif 'aborting test' in line: |
| + current_test = None |
| + elif current_test: |
| + line_attr.append('text-warning') |
| + |
| + if len(line) > 160: |
|
agable
2013/04/15 19:33:30
Why 160?
Ryan Tseng
2013/04/17 22:53:48
That was arbitrary. I think I'll remove this and
|
| + line_abbr = line[:160] |
| + line_abbr = line_abbr.replace(' ', ' ') |
| + line = line.replace(' ', ' ') |
| + if 'apply_issue' in line: |
| + logging.warning(line) |
| + for rep_from, rep_to in REPLACEMENTS: |
| + line_abbr = re.sub(rep_from, rep_to, line_abbr) |
| + line = re.sub(rep_from, rep_to, line) |
| + lines.append((line_abbr, line, line_attr)) |
| + else: |
| + line = line.replace(' ', ' ') |
| + for rep_from, rep_to in REPLACEMENTS: |
| + line = re.sub(rep_from, rep_to, line) |
| + lines.append((None, line, line_attr)) |
|
agable
2013/04/15 19:33:30
Can pull this duplicated code (line.replace; for f
Ryan Tseng
2013/04/17 22:53:48
Removed line_abbr anyways.
|
| + return (title, lines) |
|
agable
2013/04/15 19:33:30
Remove 'return title', it is identical to the inpu
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + |
| + |
| +class BuildStep(webapp2.RequestHandler): |
| + """Prases a build step page.""" |
|
agable
2013/04/15 19:33:30
Parses.
He how prases the build step page.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + @render('step.html') |
| + @expect_request('url') |
| + def get(self, url): |
| + if not url: |
| + self.redirect('/buildbot/') |
|
agable
2013/04/15 19:33:30
See comment below about having url be a required u
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + |
| + # Fetch the page. |
| + sch, netloc, path, _, _, _ = urlparse.urlparse(url) |
| + url_m = re.match(r'^/((p/)?)(.*)/builders/(.*)/builds/(\d+)$', path) |
|
agable
2013/04/15 19:33:30
Offline comment about this (p/)? to follow.
Ryan Tseng
2013/04/17 22:53:48
?
|
| + if not url_m: |
| + self.redirect('/buildbot/') |
| + prefix, _, master, builder, step = url_m.groups() |
| + json_url = '%s://%s/%s%s/json/builders/%s/builds/%s' % ( |
| + sch, netloc, prefix, master, builder, step) |
| + s = urlfetch.fetch(json_url.replace(' ', '%20'), |
| + method=urlfetch.GET, deadline=60).content |
| + logging.info(s) |
| + |
| + result = json.loads(s) |
| + |
| + # Add on some extraneous info. |
| + build_properties = dict((name, value) for name, value, _ |
| + in result['properties']) |
| + |
| + if 'rietveld' in build_properties: |
| + result['rietveld'] = build_properties['rietveld'] |
| + result['breadcrumbs'] = [ |
| + ('Master %s' % master, '#'), |
| + ('Builder %s' % builder, '#'), |
| + ('Build Number %s' % step, '#'), |
| + ('Slave %s' % result['slave'], '#') |
| + ] |
| + return result |
| + |
| + |
| +class MainPage(webapp2.RequestHandler): |
| + """Parses a buildlog page.""" |
| + @render('main.html') |
| + @expect_request('url') |
|
agable
2013/04/15 19:33:30
Having a *required* url parameter is kinda weird.
Ryan Tseng
2013/04/17 22:53:48
Done. MainPage now just parses the url and redire
|
| + def get(self, url): |
|
agable
2013/04/15 19:33:30
I'd reorder the steps this method performs for bet
Ryan Tseng
2013/04/17 22:53:48
Refactored to just do #1. The rest has also been
|
| + if not url: |
| + return {} |
| + |
| + # Redirect the page if we detect a different type of URL. |
| + sch, netloc, path, _, _, _ = urlparse.urlparse(url) |
| + logging.info(path) |
| + if re.match(r'^/((p/)?)(.*)/builders/(.*)/builds/(\d+)$', path): |
| + self.redirect('/buildbot/step?url=%s' % url) |
| + return {} |
| + |
| + buildlog_query = BuildLogModel.all().filter('url =', url) |
| + buildlog = buildlog_query.get() |
|
agable
2013/04/15 19:33:30
377 and 378 can be one line.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + log_fetch_start = time.time() |
| + if buildlog: |
| + s = zlib.decompress(buildlog.data) |
| + else: |
| + s = urlfetch.fetch(url, method=urlfetch.GET, deadline=60).content |
| + log_fetch_time = time.time() - log_fetch_start |
| + all_output = re.findall(r'<span class="(header|stdout)">(.*?)</span>', |
|
agable
2013/04/15 19:33:30
Don't bother performing this regex unless the cach
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + s, re.S) |
| + |
| + cached_result = BuildLogResultModel.all().filter( |
| + 'url =', url).filter('version =', VERSION_ID).get() |
| + parse_time_start = time.time() |
| + if cached_result: |
| + result_output = json.loads(zlib.decompress(cached_result.data)) |
| + else: |
| + result_output = [] |
| + current_source = None |
| + current_string = '' |
| + for source, output in all_output: |
| + if source == current_source: |
| + current_string += output |
| + continue |
| + else: |
| + # We hit a new source, we want to emit whatever we had left and |
| + # start anew. |
| + if current_string: |
| + result_output.append(emit(current_source, current_string)) |
| + current_string = output |
| + current_source = source |
| + if current_string: |
| + result_output.append(emit(current_source, current_string)) |
| + compressed_result = zlib.compress(json.dumps(result_output)) |
| + if len(compressed_result) < 1000 * 1000: |
|
agable
2013/04/15 19:33:30
Use 10**6
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + cached_result = BuildLogResultModel( |
| + url=url, version=VERSION_ID, data=compressed_result) |
| + cached_result.put() |
| + |
| + url_re = r'/[p]/([\w.]+)/builders/(\w+)/builds/(\w+)/steps/(\w+)/logs/.*' |
| + master_name, builder_name, build_number, step = re.search( |
| + url_re, url).groups() |
| + |
| + ret_code_m = re.search('program finished with exit code (-?\d+)', s) |
| + if ret_code_m: |
| + ret_code = int(ret_code_m.group(1)) |
| + if ret_code == 0: |
| + status = 'OK' |
| + else: |
| + status = 'ERROR' |
| + else: |
| + status = 'RUNNING' |
| + ret_code = None |
| + |
| + if ret_code is not None and not buildlog: |
| + # Cache this build log if not already. |
| + compressed_data = zlib.compress(s) |
| + if len(compressed_data) < 1000 * 1000: |
| + buildlog = BuildLogModel(url=url, data=compressed_data) |
| + buildlog.put() |
| + parse_time = time.time() - parse_time_start |
| + |
| + return { |
| + 'output': result_output, |
| + 'url': url, |
| + 'name': step, |
| + 'breadcrumbs': [ |
| + ('Master %s' % master_name, |
| + 'http://build.chromium.org/p/%s/waterfall' % master_name), |
| + ('Builder %s' % builder_name, |
| + 'http://build.chromium.org/p/%s/builders/%s' % |
| + (master_name, builder_name)), |
| + ('Build Number %s ' % build_number, |
| + 'http://build.chromium.org/p/%s/builders/%s/builds/%s' % |
| + (master_name, builder_name, build_number)), |
| + ('Step %s' % step, url) |
| + ], |
| + 'status': status, |
| + 'ret_code': ret_code, |
| + 'log_fetch_time': log_fetch_time, |
| + 'parse_time': parse_time, |
| + 'compressed_size': len(buildlog.data) if buildlog else -1, |
| + 'compressed_report': len(cached_result.data) if cached_result else -1, |
| + 'url': url, |
| + 'debug': self.request.get('debug'), |
| + 'size': len(s) |
| + } |
|
agable
2013/04/15 19:33:30
Could cache the compressed version of this whole j
Ryan Tseng
2013/04/17 22:53:48
done :) (That's what line 388/412 is)
Well, it cac
|
| + |
| + |
| +def webapp_add_wsgi_middleware(app): |
| + from google.appengine.ext.appstats import recording |
| + app = recording.appstats_wsgi_middleware(app) |
| + return app |
| + |
| + |
| +app = webapp2.WSGIApplication([ |
| + ('/buildbot/', MainPage), |
| + ('/buildbot/step/?', BuildStep), |
|
agable
2013/04/15 19:33:30
See comments on MainPage and BuildStep get methods
Ryan Tseng
2013/04/17 22:53:48
Done.
|
| + ], debug=True) |
| +app = webapp_add_wsgi_middleware(app) |