Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from __future__ import with_statement | 5 from __future__ import with_statement |
| 6 | 6 |
| 7 import datetime | 7 import datetime |
| 8 import json | 8 import json |
| 9 import logging | 9 import logging |
| 10 import os | 10 import os |
| (...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 268 'comment': None, | 268 'comment': None, |
| 269 'details': None, | 269 'details': None, |
| 270 } | 270 } |
| 271 prev_rev_db = get_or_create_row('latest_rev', rev_number) | 271 prev_rev_db = get_or_create_row('latest_rev', rev_number) |
| 272 prev_rev_db.fetch_timestamp = datetime.datetime.now() | 272 prev_rev_db.fetch_timestamp = datetime.datetime.now() |
| 273 prev_rev_db.rev_number = rev_number | 273 prev_rev_db.rev_number = rev_number |
| 274 prev_rev_db.put() | 274 prev_rev_db.put() |
| 275 put_data_into_cache('latest_rev', latest_rev_row) | 275 put_data_into_cache('latest_rev', latest_rev_row) |
| 276 | 276 |
| 277 | 277 |
| 278 def utf8_convert(beautiful_soup_tag): | |
| 279 # cmp also investigated: | |
| 280 # beautiful_soup_tag.__str__(encoding='utf-8').decode('utf-8') | |
| 281 # He found that the BeautifulSoup() __str__ method when used with a 'utf-8' | |
| 282 # encoding returned effectively the same thing as str(), a Python built-in. | |
| 283 # After a handful of tests, he switched to using str() to avoid the add'l | |
| 284 # complexity of another BeautifulSoup method. | |
| 285 return str(beautiful_soup_tag).decode('utf-8') | |
| 286 | |
| 287 | |
| 278 ########## | 288 ########## |
| 279 # ConsoleData class definition and related functions. | 289 # ConsoleData class definition and related functions. |
| 280 ########## | 290 ########## |
| 281 class ConsoleData(object): | 291 class ConsoleData(object): |
| 282 def __init__(self): | 292 def __init__(self): |
| 283 self.row_orderedkeys = [] | 293 self.row_orderedkeys = [] |
| 284 self.row_data = {} | 294 self.row_data = {} |
| 285 | 295 |
| 286 # Retain order of observed masters. | 296 # Retain order of observed masters. |
| 287 self.masters = [] | 297 self.masters = [] |
| 288 | 298 |
| 289 # Map(k,v): k=Master, v=List of categories | 299 # Map(k,v): k=Master, v=List of categories |
| 290 self.category_order = {} | 300 self.category_order = {} |
| 291 # Map(k,v): k=Master, v=Dict of category data | 301 # Map(k,v): k=Master, v=Dict of category data |
| 292 self.category_data = {} | 302 self.category_data = {} |
| 293 | 303 |
| 294 self.category_count = 0 | 304 self.category_count = 0 |
| 295 self.master = '' | 305 self.master = '' |
| 296 self.lastRevisionSeen = None | 306 self.lastRevisionSeen = None |
| 297 self.lastMasterSeen = None | 307 self.lastMasterSeen = None |
| 298 | 308 |
| 299 @staticmethod | 309 @staticmethod |
| 300 def ContentsToHtml(contents): | 310 def ContentsToHtml(contents): |
| 301 return ''.join(unicode(content).encode('ascii', 'replace') | 311 return ''.join(utf8_convert(content) for content in contents) |
| 302 for content in contents) | |
|
cmp
2015/02/12 18:37:16
This caused the UTF-8 characters |content| was hol
| |
| 303 | 312 |
| 304 @property | 313 @property |
| 305 def last_row(self): | 314 def last_row(self): |
| 306 return self.row_data[self.lastRevisionSeen] | 315 return self.row_data[self.lastRevisionSeen] |
| 307 | 316 |
| 308 def SawMaster(self, master): | 317 def SawMaster(self, master): |
| 309 self.lastMasterSeen = master | 318 self.lastMasterSeen = master |
| 310 assert(self.lastMasterSeen not in self.category_order) | 319 assert(self.lastMasterSeen not in self.category_order) |
| 311 self.masters.append(self.lastMasterSeen) | 320 self.masters.append(self.lastMasterSeen) |
| 312 self.category_order.setdefault(self.lastMasterSeen, []) | 321 self.category_order.setdefault(self.lastMasterSeen, []) |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 394 | 403 |
| 395 | 404 |
| 396 ########## | 405 ########## |
| 397 # Heavy-lifting functions that do most of the console processing. | 406 # Heavy-lifting functions that do most of the console processing. |
| 398 # AKA postfetch and postsave functions/handlers. | 407 # AKA postfetch and postsave functions/handlers. |
| 399 ########## | 408 ########## |
| 400 def console_merger(localpath, remoteurl, page_data, | 409 def console_merger(localpath, remoteurl, page_data, |
| 401 masters_to_merge=None, num_rows_to_merge=None): | 410 masters_to_merge=None, num_rows_to_merge=None): |
| 402 masters_to_merge = masters_to_merge or DEFAULT_MASTERS_TO_MERGE | 411 masters_to_merge = masters_to_merge or DEFAULT_MASTERS_TO_MERGE |
| 403 num_rows_to_merge = num_rows_to_merge or 25 | 412 num_rows_to_merge = num_rows_to_merge or 25 |
| 404 mergedconsole = ConsoleData() | 413 console_data = ConsoleData() |
| 405 surroundings = get_and_cache_pagedata('surroundings') | 414 surroundings = get_and_cache_pagedata('surroundings') |
| 406 merged_page = BeautifulSoup(surroundings['content']) | 415 merged_page = BeautifulSoup(surroundings['content']) |
| 407 merged_tag = merged_page.find('table', 'ConsoleData') | 416 merged_tag = merged_page.find('table', 'ConsoleData') |
| 408 if merged_tag is None: | 417 if merged_tag is None: |
| 409 msg = 'console_merger("%s", "%s", "%s"): merged_tag cannot be None.' % ( | 418 msg = 'console_merger("%s", "%s", "%s"): merged_tag cannot be None.' % ( |
| 410 localpath, remoteurl, page_data) | 419 localpath, remoteurl, page_data) |
| 411 logging.error(msg) | 420 logging.error(msg) |
| 412 raise Exception(msg) | 421 raise Exception(msg) |
| 413 latest_rev = int(get_and_cache_rowdata('latest_rev')['rev_number']) | 422 latest_rev = int(get_and_cache_rowdata('latest_rev')['rev_number']) |
| 414 if not latest_rev: | 423 if not latest_rev: |
| 415 logging.error('console_merger(\'%s\', \'%s\', \'%s\'): cannot get latest ' | 424 logging.error('console_merger(\'%s\', \'%s\', \'%s\'): cannot get latest ' |
| 416 'revision number.' % ( | 425 'revision number.' % ( |
| 417 localpath, remoteurl, page_data)) | 426 localpath, remoteurl, page_data)) |
| 418 return | 427 return |
| 419 fetch_timestamp = datetime.datetime.now() | 428 fetch_timestamp = datetime.datetime.now() |
| 420 for master in masters_to_merge: | 429 for master in masters_to_merge: |
| 421 # Fetch the summary one-box-per-builder for the master. | 430 # Fetch the summary one-box-per-builder for the master. |
| 422 # If we don't get it, something is wrong, skip the master entirely. | 431 # If we don't get it, something is wrong, skip the master entirely. |
| 423 master_summary = get_and_cache_pagedata('%s/console/summary' % master) | 432 master_summary = get_and_cache_pagedata('%s/console/summary' % master) |
| 424 if not master_summary['content']: | 433 if not master_summary['content']: |
| 425 continue | 434 continue |
| 426 mergedconsole.SawMaster(master) | 435 console_data.SawMaster(master) |
| 427 # Get the categories for this builder. If the builder doesn't have any | 436 # Get the categories for this builder. If the builder doesn't have any |
| 428 # categories, just use the default empty-string category. | 437 # categories, just use the default empty-string category. |
| 429 category_list = [] | 438 category_list = [] |
| 430 master_categories = get_and_cache_pagedata('%s/console/categories' % master) | 439 master_categories = get_and_cache_pagedata('%s/console/categories' % master) |
| 431 if not master_categories['content']: | 440 if not master_categories['content']: |
| 432 category_list.append('') | 441 category_list.append('') |
| 433 else: | 442 else: |
| 434 category_row = BeautifulSoup(master_categories['content']) | 443 category_row = BeautifulSoup(master_categories['content']) |
| 435 category_list = [c.text for c in category_row.findAll('td', 'DevStatus')] | 444 category_list = [c.text for c in category_row.findAll('td', 'DevStatus')] |
| 436 # Get the corresponding summary box(es). | 445 # Get the corresponding summary box(es). |
| 437 summary_row = BeautifulSoup(master_summary['content']) | 446 summary_row = BeautifulSoup(master_summary['content']) |
| 438 summary_list = summary_row.findAll('table') | 447 summary_list = summary_row.findAll('table') |
| 439 for category, summary in zip(category_list, summary_list): | 448 for category, summary in zip(category_list, summary_list): |
| 440 mergedconsole.AddCategory(category, summary) | 449 console_data.AddCategory(category, summary) |
| 441 | 450 |
| 442 # Fetch all of the rows that we need. | 451 # Fetch all of the rows that we need. |
| 443 rows_fetched = 0 | 452 rows_fetched = 0 |
| 444 revs_skipped = 0 | 453 revs_skipped = 0 |
| 445 current_rev = latest_rev | 454 current_rev = latest_rev |
| 446 while rows_fetched < num_rows_to_merge and current_rev >= 0: | 455 while rows_fetched < num_rows_to_merge and current_rev >= 0: |
| 447 # Don't get stuck looping backwards forever into data we don't have. | 456 # Don't get stuck looping backwards forever into data we don't have. |
| 448 # How hard we try scales with how many rows the person wants. | 457 # How hard we try scales with how many rows the person wants. |
| 449 if revs_skipped > max(num_rows_to_merge, 10): | 458 if revs_skipped > max(num_rows_to_merge, 10): |
| 450 break | 459 break |
| 451 row_data = get_and_cache_rowdata('%s/console/%s' % (master, current_rev)) | 460 row_data = get_and_cache_rowdata('%s/console/%s' % (master, current_rev)) |
| 452 if not row_data: | 461 if not row_data: |
| 453 current_rev -= 1 | 462 current_rev -= 1 |
| 454 revs_skipped += 1 | 463 revs_skipped += 1 |
| 455 continue | 464 continue |
| 456 mergedconsole.AddRow(row_data) | 465 console_data.AddRow(row_data) |
| 457 current_rev -= 1 | 466 current_rev -= 1 |
| 458 revs_skipped = 0 | 467 revs_skipped = 0 |
| 459 rows_fetched += 1 | 468 rows_fetched += 1 |
| 460 | 469 |
| 461 # Convert the merged content into console content. | 470 # Convert the merged content into console content. |
| 462 mergedconsole.Finish() | 471 console_data.Finish() |
| 463 template_environment = Environment() | 472 template_environment = Environment() |
| 464 template_environment.loader = FileSystemLoader('.') | 473 template_environment.loader = FileSystemLoader('.') |
| 465 def notstarted(builder_status): | 474 def notstarted(builder_status): |
| 466 """Convert a BeautifulSoup Tag from builder status to a notstarted line.""" | 475 """Convert a BeautifulSoup Tag from builder status to a notstarted line.""" |
| 467 builder_status = re.sub(r'DevSlaveBox', 'DevStatusBox', str(builder_status)) | 476 builder_status = re.sub(r'DevSlaveBox', 'DevStatusBox', str(builder_status)) |
| 468 builder_status = re.sub(r'class=\'([^\']*)\' target=', | 477 builder_status = re.sub(r'class=\'([^\']*)\' target=', |
| 469 'class=\'DevStatusBox notstarted\' target=', | 478 'class=\'DevStatusBox notstarted\' target=', |
| 470 builder_status) | 479 builder_status) |
| 471 builder_status = re.sub(r'class="([^"]*)" target=', | 480 builder_status = re.sub(r'class="([^"]*)" target=', |
| 472 'class="DevStatusBox notstarted" target=', | 481 'class="DevStatusBox notstarted" target=', |
| 473 builder_status) | 482 builder_status) |
| 474 return builder_status | 483 return builder_status |
| 475 template_environment.filters['notstarted'] = notstarted | 484 template_environment.filters['notstarted'] = notstarted |
| 476 merged_template = template_environment.from_string(console_template) | 485 merged_template = template_environment.from_string(console_template) |
| 477 merged_content = merged_template.render(data=mergedconsole) | 486 merged_console = merged_template.render(data=console_data) |
| 478 # For debugging: | 487 # For debugging: |
| 479 # print merged_content | 488 # logging.info('%r' % merged_console) |
| 480 # import code | 489 # import code |
| 481 # code.interact(local=locals()) | 490 # code.interact(local=locals()) |
| 482 | 491 |
| 483 # Place merged data at |merged_tag|'s location in |merged_page|, and put the | 492 # Place merged console at |merged_tag|'s location in |merged_page|, and put |
| 484 # result in |merged_content|. | 493 # the result in |merged_content|. |
| 485 merged_tag.replaceWith(str(merged_content)) | 494 merged_tag.replaceWith(merged_console) |
|
cmp
2015/02/12 18:37:16
And once ContentsToHtml wasn't downsampling to ?,
| |
| 486 # .prettify() may damage the HTML but makes output nicer. However, that | 495 merged_content = utf8_convert(merged_page) |
| 487 # cost is a bunch of extra whitespace. We reduce page size by not using | |
| 488 # .prettify(). | |
| 489 merged_content = merged_page.__str__(encoding=None) | |
|
cmp
2015/02/12 18:37:16
This line was causing the DTD doubling in this tes
| |
| 490 merged_content = re.sub( | 496 merged_content = re.sub( |
| 491 r'\'\<a href="\'', '\'<a \' + attributes + \' href="\'', merged_content) | 497 r'\'\<a href="\'', '\'<a \' + attributes + \' href="\'', merged_content) |
| 492 merged_content = re.sub( | 498 merged_content = re.sub( |
| 493 r'\'\<table\>\'', r"'<table ' + attributes + '>'", merged_content) | 499 r'\'\<table\>\'', r"'<table ' + attributes + '>'", merged_content) |
| 494 merged_content = re.sub( | 500 merged_content = re.sub( |
| 495 r'\'\<div\>\'', r"'<div ' + attributes + '>'", merged_content) | 501 r'\'\<div\>\'', r"'<div ' + attributes + '>'", merged_content) |
| 496 merged_content = re.sub( | 502 merged_content = re.sub( |
| 497 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content) | 503 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content) |
| 498 merged_content = re.sub( | 504 merged_content = re.sub( |
| 499 r'\<iframe\>\</iframe\>', | 505 r'\<iframe\>\</iframe\>', |
| (...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 663 # exist more than once. Reverts are examples of commits which can contain | 669 # exist more than once. Reverts are examples of commits which can contain |
| 664 # multiple Cr-Commit-Position instances. In those cases, only the last one | 670 # multiple Cr-Commit-Position instances. In those cases, only the last one |
| 665 # is correct, so split on the break tag and reverse the result to find the | 671 # is correct, so split on the break tag and reverse the result to find the |
| 666 # last occurrence of Cr-Commit-Position. | 672 # last occurrence of Cr-Commit-Position. |
| 667 for line in reversed(commit_msg.split('<br />')): | 673 for line in reversed(commit_msg.split('<br />')): |
| 668 if line.startswith('Cr-Commit-Position: '): | 674 if line.startswith('Cr-Commit-Position: '): |
| 669 return filter(str.isdigit, str(line.split('@')[-1])) | 675 return filter(str.isdigit, str(line.split('@')[-1])) |
| 670 return '0' | 676 return '0' |
| 671 | 677 |
| 672 | 678 |
| 673 def utf8_convert(bstring): | |
| 674 # cmp also investigated: | |
| 675 # bstring.__str__(encoding='utf-8').decode('utf-8') | |
| 676 # He found that the BeautifulSoup() __str__ method when used with a 'utf-8' | |
| 677 # encoding returned effectively the same thing as str(), a Python built-in. | |
| 678 # After a handful of tests, he switched to using str() to avoid the add'l | |
| 679 # complexity of another BeautifulSoup method. | |
| 680 return str(bstring).decode('utf-8') | |
| 681 | |
| 682 | |
| 683 # W0613:600,28:parse_master: Unused argument 'remoteurl' | 679 # W0613:600,28:parse_master: Unused argument 'remoteurl' |
| 684 # pylint: disable=W0613 | 680 # pylint: disable=W0613 |
| 685 def parse_master(localpath, remoteurl, page_data=None): | 681 def parse_master(localpath, remoteurl, page_data=None): |
| 686 """Part of the new pipeline to store individual rows rather than | 682 """Part of the new pipeline to store individual rows rather than |
| 687 whole pages of html. Parses the master data into a set of rows, | 683 whole pages of html. Parses the master data into a set of rows, |
| 688 and writes them out to the datastore in an easily retrievable format. | 684 and writes them out to the datastore in an easily retrievable format. |
| 689 | 685 |
| 690 Doesn't modify page_data dict. | 686 Doesn't modify page_data dict. |
| 691 """ | 687 """ |
| 692 ts = datetime.datetime.now() | 688 ts = datetime.datetime.now() |
| (...skipping 558 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1251 'builds/-1?as_text=1'), | 1247 'builds/-1?as_text=1'), |
| 1252 'localpath': | 1248 'localpath': |
| 1253 'chromium.lkgr/json/builders/Linux%20x64/builds/-1/as_text=1.json', | 1249 'chromium.lkgr/json/builders/Linux%20x64/builds/-1/as_text=1.json', |
| 1254 'maxage': 2*60, # 2 mins | 1250 'maxage': 2*60, # 2 mins |
| 1255 }, | 1251 }, |
| 1256 | 1252 |
| 1257 # # Trigger background process update. | 1253 # # Trigger background process update. |
| 1258 # { | 1254 # { |
| 1259 # 'remoteurl': 'http://chromium-build.appspot.com/backend/update' | 1255 # 'remoteurl': 'http://chromium-build.appspot.com/backend/update' |
| 1260 ] | 1256 ] |
| OLD | NEW |