Index: buildlogparse.py |
diff --git a/buildlogparse.py b/buildlogparse.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..a8d93873f49a21fb0a106df06b8c5cf76bd32adb |
--- /dev/null |
+++ b/buildlogparse.py |
@@ -0,0 +1,476 @@ |
+ |
+ |
+import webapp2 |
+from google.appengine.ext import db |
+from datetime import timedelta |
+import cStringIO |
+import time |
+import jinja2 |
+import datetime |
+import re |
+import logging |
+import urllib |
+from google.appengine.api import urlfetch |
+import base64 |
+import urlparse |
+import os |
+import json |
+import Queue |
+import os |
+import zlib |
+from google.appengine.api import users |
+from google.appengine.api import memcache |
+from google.appengine.ext import deferred |
+from google.appengine.api import files |
+from google.appengine.api import mail |
agable
2013/04/15 19:33:30
Please cleanup imports to be only what you actuall
Ryan Tseng
2013/04/17 22:53:48
Done.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ |
+VERSION_ID = os.environ['CURRENT_VERSION_ID'] |
+ |
+jinja_environment = jinja2.Environment( |
+ loader=jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__), |
+ 'templates')), |
+ autoescape=True, |
+ extensions=['jinja2.ext.autoescape']) |
+ |
+if os.environ.get('HTTP_HOST'): |
+ APP_URL = os.environ['HTTP_HOST'] |
+else: |
+ APP_URL = os.environ['SERVER_NAME'] |
+ |
+REPLACEMENTS = [ |
+ # Find ../../scripts/.../*.py scripts and add links to them. |
+ (r'\.\./\.\./\.\./scripts/(.*)\.py', |
+ r'<a href="https://code.google.com/p/chromium/codesearch#chromium/tools/' |
+ r'build/scripts/\1.py">../../scripts/\1.py</a>'), |
+ |
+ # Find ../../chrome/.../*.cc files and add links to them. |
+ (r'\.\./\.\./chrome/(.*)\.cc:(\d+)', |
+ r'<a href="https://code.google.com/p/chromium/codesearch#chromium/src/' |
+ r'chrome/\1.cc&l=\2">../../chrome/\1.cc:\2</a>'), |
+ |
+ # Searches for codereview issue numbers, and add codereview links. |
+ (r'apply_issue(.*)-i (\d{8})(.*)-s (.*)', |
+ r'apply_issue\1-i <a href="\4/\2">\2</a>\3-s \4'), |
+ |
+ # Add green labels to PASSED items. |
+ (r'\[( PASSED )\]', |
+ r'<span class="label label-success">[\1]</span>'), |
+ |
+ # Add red labels to FAILED items. |
+ (r'\[( FAILED )\]', |
+ r'<span class="label label-important">[\1]</span>'), |
+ |
+ # Add black labels ot RUN items. |
+ (r'\[( RUN )\]', |
+ r'<span class="label label-inverse">[\1]</span>'), |
+ |
+ # Add badges to running tests. |
+ (r'\[(( )*\d+/\d+)\](( )+)(\d+\.\d+s) ' |
+ r'([\w/]+\.[\w/]+) \(([\d.s]+)\)', |
+ r'<span class="badge badge-success">\1</span>\3<span class="badge">' |
+ r'\5</span> \6 <span class="badge">\7</span>'), |
+ |
+ # Add gray labels to [==========] blocks. |
+ (r'\[([-=]{10})\]', |
+ r'<span class="label">[\1]</span>'), |
+ |
+ # Find .cc and .h files and add codesite links to them. |
+ (r'\.\./\.\./([\w/-]+)\.(cc|h): ', |
+ r'<a href="https://code.google.com/p/chromium/codesearch#chromium/src/' |
+ r'\1.\2">../../\1.\2</a>: '), |
+ |
+ # Find source files with line numbers and add links to them. |
+ (r'\.\./\.\./([\w/-]+)\.(cc|h):(\d+): ', |
+ r'<a href="https://code.google.com/p/chromium/codesearch#chromium/src/' |
+ r'\1.\2&l=\3">../../\1.\2:\3</a>: '), |
+ |
+ # Add badges to compiling items. |
+ (r'\[(\d+/\d+)\] (CXX|AR|STAMP|CC|ACTION|RULE|COPY)', |
+ r'<span class="badge badge-info">\1</span> ' |
+ r'<span class="badge">\2</span>'), |
+ |
+ # Bold the LHS of A=B text. |
+ (r'^(( )*)(\w+)=([\w:/-_.]+)', |
+ r'\1<strong>\3</strong>=\4'), |
+] |
+ |
+############### |
+# Jinja filters |
+############### |
+ |
+def delta_time(delta): |
+ hours = int(delta/60/60) |
+ minutes = int((delta - hours * 3600)/60) |
+ seconds = int(delta - (hours * 3600) - (minutes * 60)) |
+ result = '' |
+ if hours > 1: |
+ result += '%d hrs ' % hours |
agable
2013/04/15 19:33:30
nit: add commas after hr/hrs, min/mins, and a peri
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ elif hours: |
+ result += '%d hr ' % hours |
+ if minutes > 1: |
+ result += '%d mins ' % minutes |
+ elif minutes: |
+ result += '%d min ' % minutes |
+ if not hours: |
+ if seconds > 1: |
+ result += '%d secs' % seconds |
+ else: |
+ result += '%d sec' % seconds |
+ return result |
+jinja_environment.filters['delta_time'] = delta_time |
+ |
+def time_since(timestamp): |
+ delta = time.time() - timestamp |
+ return delta_time(delta) |
+jinja_environment.filters['time_since'] = time_since |
+ |
+def nl2br(value): |
+ return value.replace('\n','<br>\n') |
+jinja_environment.filters['nl2br'] = nl2br |
+ |
+def cl_comment(value): |
+ """Add links to https:// addresses, BUG=####, and trim excessive newlines.""" |
+ value = re.sub(r'(https?://.*)', r'<a href="\1">\1</a>', value) |
+ value = re.sub( |
+ r'BUG=(\d+)', r'BUG=<a href="http://crbug.com/\1">\1</a>', value) |
+ # value = re.sub(r'\n\n', r'\n', value) |
+ value = re.sub(r'\n', r'<br>', value) |
+ return value |
+jinja_environment.filters['cl_comment'] = cl_comment |
+ |
+######## |
+# Models |
+######## |
+ |
+class BuildLogModel(db.Model): |
+ # Used for caching finished build logs. |
+ url = db.StringProperty() |
+ data = db.BlobProperty() |
+ |
+class BuildLogResultModel(db.Model): |
+ # Used for caching finished and parsed build logs. |
+ url = db.StringProperty() |
+ version = db.StringProperty() |
+ data = db.BlobProperty() |
+ |
+ |
+############ |
+# Decorators |
+############ |
+def render(template_filename): |
agable
2013/04/15 19:33:30
Docstring, similar to the one for render_json belo
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ def _render(fn): |
+ def wrapper(self, *args, **kwargs): |
+ results = fn(self, *args, **kwargs) |
+ template = jinja_environment.get_template(template_filename) |
+ self.response.out.write(template.render(results)) |
+ return wrapper |
+ return _render |
+ |
+def render_json(fn): |
+ # The function is expected to return a dict, and we want to render json. |
agable
2013/04/15 19:33:30
Make this a real docstring.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ def wrapper(self, *args, **kwargs): |
+ results = fn(self, *args, **kwargs) |
+ self.response.out.write(json.dumps(results)) |
+ return wrapper |
+ |
+def return_json_if_flag_is_set_else_render(template_filename): |
agable
2013/04/15 19:33:30
maybe_return_json?
Ryan Tseng
2013/04/17 22:53:48
Works
|
+ """If the variable 'json' exists in the request, return a json object. |
+ Otherwise render the page using the template""" |
+ def _render(fn): |
+ def wrapper(self, *args, **kwargs): |
+ results = fn(self, *args, **kwargs) |
+ if self.request.get('json'): |
+ self.response.out.write(json.dumps(results)) |
+ else: |
+ template = jinja_environment.get_template(template_filename) |
+ self.response.out.write(template.render(results)) |
+ return wrapper |
+ return _render |
+ |
+def login_required(fn): |
+ """Redirect user to a login page.""" |
+ def wrapper(self, *args, **kwargs): |
+ user = users.get_current_user() |
+ if not user: |
+ self.redirect(users.create_login_url(self.request.uri)) |
+ return |
+ else: |
+ return fn(self, *args, **kwargs) |
+ return wrapper |
+ |
+def google_login_required(fn): |
+ """Return 403 unless the user is logged in from a @google.com domain""" |
+ def wrapper(self, *args, **kwargs): |
+ user = users.get_current_user() |
+ if not user: |
+ self.redirect(users.create_login_url(self.request.uri)) |
+ return |
+ email_match = re.match('^(.*)@(.*)$', user.email()) |
+ if email_match: |
+ _, domain = email_match.groups() |
+ if domain == 'google.com': |
+ return fn(self, *args, **kwargs) |
+ self.error(403) # Unrecognized email or unauthroized domain. |
+ self.response.out.write('unauthroized email %s' % user.user_id()) |
+ return wrapper |
+ |
+def admin_required(fn): |
+ """Return 403 unless an admin is logged in""" |
agable
2013/04/15 19:33:30
Give all of these docstrings periods -- they're se
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ def wrapper(self, *args, **kwargs): |
+ user = users.get_current_user() |
+ if not user: |
+ self.redirect(users.create_login_url(self.request.uri)) |
+ return |
+ elif not users.is_current_user_admin(): |
+ self.error(403) |
+ return |
+ else: |
+ return fn(self, *args, **kwargs) |
+ return wrapper |
+ |
+def expect_request(*request_args): |
agable
2013/04/15 19:33:30
expect_request_param? expect_request sounds like i
Ryan Tseng
2013/04/17 22:53:48
Works for me. Done
|
+ """Strips out the expected args from a request and feeds it into the function |
+ as the arguments. Optionally, typecast the argument from a string into a |
+ different class. Examples include: |
+ name (Get the request object called "name") |
+ time as timestamp (Get "time", pass it in as "timestamp") |
+ """ |
+ def _decorator(fn): |
+ def wrapper(self, *args, **kwargs): |
+ request_kwargs = {} |
+ for arg in request_args: |
+ arg_match = re.match(r'^(\((\w+)\))?\s*(\w+)( as (\w+))?$', arg) |
+ if arg_match: |
+ _, target_type_name, name, _, target_name = arg_match.groups() |
+ if not target_name: |
+ target_name = name |
+ request_item = self.request.get(name) |
+ request_kwargs[target_name] = request_item |
+ else: |
+ raise Exception('Incorrect format %s' % arg) |
+ kwargs.update(request_kwargs) |
+ return fn(self, *args, **kwargs) |
+ return wrapper |
+ return _decorator |
agable
2013/04/15 19:33:30
All these wrappers are really nice and general. On
Ryan Tseng
2013/04/17 22:53:48
Or I can do that now :)
|
+ |
+def emit(source, out): |
+ # TODO(hinoka): This currently employs a "lookback" strategy |
+ # (Find [PASS/FAIL], then goes back and marks all of the lines.) |
+ # This should be switched to a "scan twice" strategy. 1st pass creates a |
+ # Test Name -> PASS/FAIL/INCOMPLETE dictionary, and 2nd pass marks the lines. |
+ title = source |
agable
2013/04/15 19:33:30
Remove this, title is never used.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ attr = [] |
+ if source == 'header': |
+ attr.append('text-info') |
+ lines = [] |
+ current_test = None |
+ current_test_line = 0 |
+ for line in out.split('\n'): |
+ if line: |
+ test_match = re.search(r'\[ RUN \]\s*([^() ]*)\s*', line) |
agable
2013/04/15 19:33:30
Here you're searching for [ RUN ], while earli
Ryan Tseng
2013/04/17 22:53:48
This set of regex is a bit special in that its not
|
+ line_attr = attr[:] |
+ if test_match: |
+ # This line is a "We're running a test" line. |
+ current_test = test_match.group(1).strip() |
+ current_test_line = len(lines) |
+ elif '[ OK ]' in line or '[ PASSED ]' in line: |
+ line_attr.append('text-success') |
+ test_match = re.search(r'\[ OK \]\s*([^(), ]*)\s*', line) |
+ if test_match: |
+ finished_test = test_match.group(1).strip() |
+ for line_item in lines[current_test_line:]: |
+ if finished_test == current_test: |
+ line_item[2].append('text-success') |
+ else: |
+ line_item[2].append('text-error') |
+ current_test = None |
+ elif '[ FAILED ]' in line: |
+ line_attr.append('text-error') |
+ test_match = re.search(r'\[ FAILED \]\s*([^(), ]*)\s*', line) |
+ if test_match: |
+ finished_test = test_match.group(1).strip() |
+ for line_item in lines[current_test_line:]: |
+ if finished_test == current_test: |
+ line_item[2].append('text-error') |
+ current_test = None |
+ elif re.search(r'\[.{10}\]', line): |
+ current_test = None |
+ elif re.search(r'\[\s*\d+/\d+\]\s*\d+\.\d+s\s+[\w/]+\.' |
agable
2013/04/15 19:33:30
Document your regexes :)
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ r'[\w/]+\s+\([\d.s]+\)', line): |
+ current_test = None |
+ line_attr.append('text-success') |
+ elif 'aborting test' in line: |
+ current_test = None |
+ elif current_test: |
+ line_attr.append('text-warning') |
+ |
+ if len(line) > 160: |
agable
2013/04/15 19:33:30
Why 160?
Ryan Tseng
2013/04/17 22:53:48
That was arbitrary. I think I'll remove this and
|
+ line_abbr = line[:160] |
+ line_abbr = line_abbr.replace(' ', ' ') |
+ line = line.replace(' ', ' ') |
+ if 'apply_issue' in line: |
+ logging.warning(line) |
+ for rep_from, rep_to in REPLACEMENTS: |
+ line_abbr = re.sub(rep_from, rep_to, line_abbr) |
+ line = re.sub(rep_from, rep_to, line) |
+ lines.append((line_abbr, line, line_attr)) |
+ else: |
+ line = line.replace(' ', ' ') |
+ for rep_from, rep_to in REPLACEMENTS: |
+ line = re.sub(rep_from, rep_to, line) |
+ lines.append((None, line, line_attr)) |
agable
2013/04/15 19:33:30
Can pull this duplicated code (line.replace; for f
Ryan Tseng
2013/04/17 22:53:48
Removed line_abbr anyways.
|
+ return (title, lines) |
agable
2013/04/15 19:33:30
Remove 'return title', it is identical to the inpu
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ |
+ |
+class BuildStep(webapp2.RequestHandler): |
+ """Prases a build step page.""" |
agable
2013/04/15 19:33:30
Parses.
He how prases the build step page.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ @render('step.html') |
+ @expect_request('url') |
+ def get(self, url): |
+ if not url: |
+ self.redirect('/buildbot/') |
agable
2013/04/15 19:33:30
See comment below about having url be a required u
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ |
+ # Fetch the page. |
+ sch, netloc, path, _, _, _ = urlparse.urlparse(url) |
+ url_m = re.match(r'^/((p/)?)(.*)/builders/(.*)/builds/(\d+)$', path) |
agable
2013/04/15 19:33:30
Offline comment about this (p/)? to follow.
Ryan Tseng
2013/04/17 22:53:48
?
|
+ if not url_m: |
+ self.redirect('/buildbot/') |
+ prefix, _, master, builder, step = url_m.groups() |
+ json_url = '%s://%s/%s%s/json/builders/%s/builds/%s' % ( |
+ sch, netloc, prefix, master, builder, step) |
+ s = urlfetch.fetch(json_url.replace(' ', '%20'), |
+ method=urlfetch.GET, deadline=60).content |
+ logging.info(s) |
+ |
+ result = json.loads(s) |
+ |
+ # Add on some extraneous info. |
+ build_properties = dict((name, value) for name, value, _ |
+ in result['properties']) |
+ |
+ if 'rietveld' in build_properties: |
+ result['rietveld'] = build_properties['rietveld'] |
+ result['breadcrumbs'] = [ |
+ ('Master %s' % master, '#'), |
+ ('Builder %s' % builder, '#'), |
+ ('Build Number %s' % step, '#'), |
+ ('Slave %s' % result['slave'], '#') |
+ ] |
+ return result |
+ |
+ |
+class MainPage(webapp2.RequestHandler): |
+ """Parses a buildlog page.""" |
+ @render('main.html') |
+ @expect_request('url') |
agable
2013/04/15 19:33:30
Having a *required* url parameter is kinda weird.
Ryan Tseng
2013/04/17 22:53:48
Done. MainPage now just parses the url and redire
|
+ def get(self, url): |
agable
2013/04/15 19:33:30
I'd reorder the steps this method performs for bet
Ryan Tseng
2013/04/17 22:53:48
Refactored to just do #1. The rest has also been
|
+ if not url: |
+ return {} |
+ |
+ # Redirect the page if we detect a different type of URL. |
+ sch, netloc, path, _, _, _ = urlparse.urlparse(url) |
+ logging.info(path) |
+ if re.match(r'^/((p/)?)(.*)/builders/(.*)/builds/(\d+)$', path): |
+ self.redirect('/buildbot/step?url=%s' % url) |
+ return {} |
+ |
+ buildlog_query = BuildLogModel.all().filter('url =', url) |
+ buildlog = buildlog_query.get() |
agable
2013/04/15 19:33:30
377 and 378 can be one line.
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ log_fetch_start = time.time() |
+ if buildlog: |
+ s = zlib.decompress(buildlog.data) |
+ else: |
+ s = urlfetch.fetch(url, method=urlfetch.GET, deadline=60).content |
+ log_fetch_time = time.time() - log_fetch_start |
+ all_output = re.findall(r'<span class="(header|stdout)">(.*?)</span>', |
agable
2013/04/15 19:33:30
Don't bother performing this regex unless the cach
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ s, re.S) |
+ |
+ cached_result = BuildLogResultModel.all().filter( |
+ 'url =', url).filter('version =', VERSION_ID).get() |
+ parse_time_start = time.time() |
+ if cached_result: |
+ result_output = json.loads(zlib.decompress(cached_result.data)) |
+ else: |
+ result_output = [] |
+ current_source = None |
+ current_string = '' |
+ for source, output in all_output: |
+ if source == current_source: |
+ current_string += output |
+ continue |
+ else: |
+ # We hit a new source, we want to emit whatever we had left and |
+ # start anew. |
+ if current_string: |
+ result_output.append(emit(current_source, current_string)) |
+ current_string = output |
+ current_source = source |
+ if current_string: |
+ result_output.append(emit(current_source, current_string)) |
+ compressed_result = zlib.compress(json.dumps(result_output)) |
+ if len(compressed_result) < 1000 * 1000: |
agable
2013/04/15 19:33:30
Use 10**6
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ cached_result = BuildLogResultModel( |
+ url=url, version=VERSION_ID, data=compressed_result) |
+ cached_result.put() |
+ |
+ url_re = r'/[p]/([\w.]+)/builders/(\w+)/builds/(\w+)/steps/(\w+)/logs/.*' |
+ master_name, builder_name, build_number, step = re.search( |
+ url_re, url).groups() |
+ |
+ ret_code_m = re.search('program finished with exit code (-?\d+)', s) |
+ if ret_code_m: |
+ ret_code = int(ret_code_m.group(1)) |
+ if ret_code == 0: |
+ status = 'OK' |
+ else: |
+ status = 'ERROR' |
+ else: |
+ status = 'RUNNING' |
+ ret_code = None |
+ |
+ if ret_code is not None and not buildlog: |
+ # Cache this build log if not already. |
+ compressed_data = zlib.compress(s) |
+ if len(compressed_data) < 1000 * 1000: |
+ buildlog = BuildLogModel(url=url, data=compressed_data) |
+ buildlog.put() |
+ parse_time = time.time() - parse_time_start |
+ |
+ return { |
+ 'output': result_output, |
+ 'url': url, |
+ 'name': step, |
+ 'breadcrumbs': [ |
+ ('Master %s' % master_name, |
+ 'http://build.chromium.org/p/%s/waterfall' % master_name), |
+ ('Builder %s' % builder_name, |
+ 'http://build.chromium.org/p/%s/builders/%s' % |
+ (master_name, builder_name)), |
+ ('Build Number %s ' % build_number, |
+ 'http://build.chromium.org/p/%s/builders/%s/builds/%s' % |
+ (master_name, builder_name, build_number)), |
+ ('Step %s' % step, url) |
+ ], |
+ 'status': status, |
+ 'ret_code': ret_code, |
+ 'log_fetch_time': log_fetch_time, |
+ 'parse_time': parse_time, |
+ 'compressed_size': len(buildlog.data) if buildlog else -1, |
+ 'compressed_report': len(cached_result.data) if cached_result else -1, |
+ 'url': url, |
+ 'debug': self.request.get('debug'), |
+ 'size': len(s) |
+ } |
agable
2013/04/15 19:33:30
Could cache the compressed version of this whole j
Ryan Tseng
2013/04/17 22:53:48
done :) (That's what line 388/412 is)
Well, it cac
|
+ |
+ |
+def webapp_add_wsgi_middleware(app): |
+ from google.appengine.ext.appstats import recording |
+ app = recording.appstats_wsgi_middleware(app) |
+ return app |
+ |
+ |
+app = webapp2.WSGIApplication([ |
+ ('/buildbot/', MainPage), |
+ ('/buildbot/step/?', BuildStep), |
agable
2013/04/15 19:33:30
See comments on MainPage and BuildStep get methods
Ryan Tseng
2013/04/17 22:53:48
Done.
|
+ ], debug=True) |
+app = webapp_add_wsgi_middleware(app) |