Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(409)

Side by Side Diff: appengine/chromium_build/app.py

Issue 919733003: Fix another source of utf8 characters being lost. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from __future__ import with_statement 5 from __future__ import with_statement
6 6
7 import datetime 7 import datetime
8 import json 8 import json
9 import logging 9 import logging
10 import os 10 import os
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after
291 # Map(k,v): k=Master, v=Dict of category data 291 # Map(k,v): k=Master, v=Dict of category data
292 self.category_data = {} 292 self.category_data = {}
293 293
294 self.category_count = 0 294 self.category_count = 0
295 self.master = '' 295 self.master = ''
296 self.lastRevisionSeen = None 296 self.lastRevisionSeen = None
297 self.lastMasterSeen = None 297 self.lastMasterSeen = None
298 298
299 @staticmethod 299 @staticmethod
300 def ContentsToHtml(contents): 300 def ContentsToHtml(contents):
301 return ''.join(unicode(content).encode('ascii', 'replace') 301 return ''.join(str(content).decode('utf-8') for content in contents)
302 for content in contents)
303 302
304 @property 303 @property
305 def last_row(self): 304 def last_row(self):
306 return self.row_data[self.lastRevisionSeen] 305 return self.row_data[self.lastRevisionSeen]
307 306
308 def SawMaster(self, master): 307 def SawMaster(self, master):
309 self.lastMasterSeen = master 308 self.lastMasterSeen = master
310 assert(self.lastMasterSeen not in self.category_order) 309 assert(self.lastMasterSeen not in self.category_order)
311 self.masters.append(self.lastMasterSeen) 310 self.masters.append(self.lastMasterSeen)
312 self.category_order.setdefault(self.lastMasterSeen, []) 311 self.category_order.setdefault(self.lastMasterSeen, [])
(...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after
475 template_environment.filters['notstarted'] = notstarted 474 template_environment.filters['notstarted'] = notstarted
476 merged_template = template_environment.from_string(console_template) 475 merged_template = template_environment.from_string(console_template)
477 merged_content = merged_template.render(data=mergedconsole) 476 merged_content = merged_template.render(data=mergedconsole)
478 # For debugging: 477 # For debugging:
479 # print merged_content 478 # print merged_content
480 # import code 479 # import code
481 # code.interact(local=locals()) 480 # code.interact(local=locals())
482 481
483 # Place merged data at |merged_tag|'s location in |merged_page|, and put the 482 # Place merged data at |merged_tag|'s location in |merged_page|, and put the
484 # result in |merged_content|. 483 # result in |merged_content|.
485 merged_tag.replaceWith(str(merged_content)) 484 merged_tag.replaceWith(merged_content)
486 # .prettify() may damage the HTML but makes output nicer. However, that 485 # .prettify() may damage the HTML but makes output nicer. However, that
487 # cost is a bunch of extra whitespace. We reduce page size by not using 486 # cost is a bunch of extra whitespace. We reduce page size by not using
488 # .prettify(). 487 # .prettify().
489 merged_content = merged_page.__str__(encoding=None) 488 merged_content = merged_page.__str__(encoding=None)
490 merged_content = re.sub( 489 merged_content = re.sub(
491 r'\'\<a href="\'', '\'<a \' + attributes + \' href="\'', merged_content) 490 r'\'\<a href="\'', '\'<a \' + attributes + \' href="\'', merged_content)
492 merged_content = re.sub( 491 merged_content = re.sub(
493 r'\'\<table\>\'', r"'<table ' + attributes + '>'", merged_content) 492 r'\'\<table\>\'', r"'<table ' + attributes + '>'", merged_content)
494 merged_content = re.sub( 493 merged_content = re.sub(
495 r'\'\<div\>\'', r"'<div ' + attributes + '>'", merged_content) 494 r'\'\<div\>\'', r"'<div ' + attributes + '>'", merged_content)
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after
663 # exist more than once. Reverts are examples of commits which can contain 662 # exist more than once. Reverts are examples of commits which can contain
664 # multiple Cr-Commit-Position instances. In those cases, only the last one 663 # multiple Cr-Commit-Position instances. In those cases, only the last one
665 # is correct, so split on the break tag and reverse the result to find the 664 # is correct, so split on the break tag and reverse the result to find the
666 # last occurrence of Cr-Commit-Position. 665 # last occurrence of Cr-Commit-Position.
667 for line in reversed(commit_msg.split('<br />')): 666 for line in reversed(commit_msg.split('<br />')):
668 if line.startswith('Cr-Commit-Position: '): 667 if line.startswith('Cr-Commit-Position: '):
669 return filter(str.isdigit, str(line.split('@')[-1])) 668 return filter(str.isdigit, str(line.split('@')[-1]))
670 return '0' 669 return '0'
671 670
672 671
673 def utf8_convert(bstring): 672 def utf8_convert(bstag):
jrobbins 2015/02/12 17:55:31 What does 'bstag' stand for?
cmp 2015/02/12 17:59:58 BeautifulSoup tag.
674 # cmp also investigated: 673 # cmp also investigated:
675 # bstring.__str__(encoding='utf-8').decode('utf-8') 674 # bstag.__str__(encoding='utf-8').decode('utf-8')
676 # He found that the BeautifulSoup() __str__ method when used with a 'utf-8' 675 # He found that the BeautifulSoup() __str__ method when used with a 'utf-8'
677 # encoding returned effectively the same thing as str(), a Python built-in. 676 # encoding returned effectively the same thing as str(), a Python built-in.
678 # After a handful of tests, he switched to using str() to avoid the add'l 677 # After a handful of tests, he switched to using str() to avoid the add'l
679 # complexity of another BeautifulSoup method. 678 # complexity of another BeautifulSoup method.
680 return str(bstring).decode('utf-8') 679 return str(bstag).decode('utf-8')
681 680
682 681
683 # W0613:600,28:parse_master: Unused argument 'remoteurl' 682 # W0613:600,28:parse_master: Unused argument 'remoteurl'
684 # pylint: disable=W0613 683 # pylint: disable=W0613
685 def parse_master(localpath, remoteurl, page_data=None): 684 def parse_master(localpath, remoteurl, page_data=None):
686 """Part of the new pipeline to store individual rows rather than 685 """Part of the new pipeline to store individual rows rather than
687 whole pages of html. Parses the master data into a set of rows, 686 whole pages of html. Parses the master data into a set of rows,
688 and writes them out to the datastore in an easily retrievable format. 687 and writes them out to the datastore in an easily retrievable format.
689 688
690 Doesn't modify page_data dict. 689 Doesn't modify page_data dict.
(...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after
1251 'builds/-1?as_text=1'), 1250 'builds/-1?as_text=1'),
1252 'localpath': 1251 'localpath':
1253 'chromium.lkgr/json/builders/Linux%20x64/builds/-1/as_text=1.json', 1252 'chromium.lkgr/json/builders/Linux%20x64/builds/-1/as_text=1.json',
1254 'maxage': 2*60, # 2 mins 1253 'maxage': 2*60, # 2 mins
1255 }, 1254 },
1256 1255
1257 # # Trigger background process update. 1256 # # Trigger background process update.
1258 # { 1257 # {
1259 # 'remoteurl': 'http://chromium-build.appspot.com/backend/update' 1258 # 'remoteurl': 'http://chromium-build.appspot.com/backend/update'
1260 ] 1259 ]
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698