Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Utility for outputting a HTML diff of two multi-line strings. | 5 """Utility for outputting a HTML diff of two multi-line strings. |
| 6 | 6 |
| 7 The main purpose of this utility is to show the difference between | 7 The main purpose of this utility is to show the difference between |
| 8 text baselines (-expected.txt files) and actual text results. | 8 text baselines (-expected.txt files) and actual text results. |
| 9 | 9 |
| 10 Note, in the standard library module difflib, there is also a HtmlDiff class, | 10 Note, in the standard library module difflib, there is also a HtmlDiff class, |
| 11 although it outputs a larger and more complex HTML table than we need. | 11 although it outputs a larger and more complex HTML table than we need. |
| 12 """ | 12 """ |
| 13 | 13 |
| 14 import cgi | 14 import cgi |
| 15 import difflib | 15 import difflib |
| 16 | 16 |
| 17 _TEMPLATE = """<html> | 17 _TEMPLATE = """<html> |
| 18 <head> | 18 <head> |
| 19 <style> | 19 <style> |
| 20 pre { white-space: pre-wrap; } | 20 table { white-space: pre-wrap; font-family: monospace; border-collapse: collapse ; } |
| 21 th { color: #444; background: #eed; text-align: right; vertical-align: baseline; padding: 1px 4px 1px 4px; } | |
| 21 .del { background: #faa; } | 22 .del { background: #faa; } |
| 22 .add { background: #afa; } | 23 .add { background: #afa; } |
| 23 </style> | 24 </style> |
| 24 </head> | 25 </head> |
| 25 <body> | 26 <body><table>%s</table></body> |
| 26 <pre>%s</pre> | |
| 27 </body> | |
| 28 </html> | 27 </html> |
| 29 """ | 28 """ |
| 30 | 29 |
| 31 | 30 |
| 32 def html_diff(a_text, b_text): | 31 def html_diff(a_text, b_text): |
| 33 """Returns a diff between two strings as HTML.""" | 32 """Returns a diff between two strings as HTML.""" |
| 34 # Diffs can be between multiple text files of different encodings | 33 # Diffs can be between multiple text files of different encodings |
| 35 # so we always want to deal with them as byte arrays, not unicode strings. | 34 # so we always want to deal with them as byte arrays, not unicode strings. |
| 36 assert isinstance(a_text, str) | 35 assert isinstance(a_text, str) |
| 37 assert isinstance(b_text, str) | 36 assert isinstance(b_text, str) |
| 38 a_lines = a_text.splitlines(True) | 37 a_lines = a_text.splitlines(True) |
| 39 b_lines = b_text.splitlines(True) | 38 b_lines = b_text.splitlines(True) |
| 40 return _TEMPLATE % html_diff_body(a_lines, b_lines) | 39 return _TEMPLATE % HtmlDiffGenerator().generate_tbody(a_lines, b_lines) |
| 41 | 40 |
| 42 | 41 |
| 43 def html_diff_body(a_lines, b_lines): | 42 class HtmlDiffGenerator(object): |
| 44 matcher = difflib.SequenceMatcher(None, a_lines, b_lines) | |
| 45 output = [] | |
| 46 for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes(): | |
| 47 a_chunk = ''.join(a_lines[a_start:a_end]) | |
| 48 b_chunk = ''.join(b_lines[b_start:b_end]) | |
| 49 output.append(_format_chunk(tag, a_chunk, b_chunk)) | |
| 50 return ''.join(output) | |
| 51 | 43 |
| 44 def __init__(self): | |
| 45 self.a_line_no = None | |
| 46 self.b_line_no = None | |
| 47 self.a_lines_len = None | |
| 52 | 48 |
| 53 def _format_chunk(tag, a_chunk, b_chunk): | 49 def generate_tbody(self, a_lines, b_lines): |
| 54 if tag == 'delete': | 50 self.a_line_no = 0 |
| 55 return _format_delete(a_chunk) | 51 self.b_line_no = 0 |
| 56 if tag == 'insert': | 52 self.a_lines_len = len(a_lines) |
| 57 return _format_insert(b_chunk) | 53 matcher = difflib.SequenceMatcher(None, a_lines, b_lines) |
| 58 if tag == 'replace': | 54 output = [] |
| 59 return _format_delete(a_chunk) + _format_insert(b_chunk) | 55 for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes(): |
| 60 assert tag == 'equal' | 56 output.append(self._format_chunk(tag, a_lines[a_start:a_end], b_line s[b_start:b_end])) |
| 61 return cgi.escape(a_chunk) | 57 return ''.join(output) |
| 62 | 58 |
| 59 def _format_chunk(self, tag, a_chunk, b_chunk): | |
| 60 if tag == 'delete': | |
| 61 return self._format_delete(a_chunk) | |
| 62 if tag == 'insert': | |
| 63 return self._format_insert(b_chunk) | |
| 64 if tag == 'replace': | |
| 65 return self._format_delete(a_chunk) + self._format_insert(b_chunk) | |
| 66 assert tag == 'equal' | |
| 67 output = '' | |
| 68 if len(a_chunk) <= 7: | |
| 69 for line in a_chunk: | |
| 70 output += self._format_equal(line) | |
| 71 else: | |
| 72 # Do not show context lines at the beginning of the file. | |
| 73 if self.a_line_no == 0: | |
| 74 self.a_line_no += 3 | |
| 75 self.b_line_no += 3 | |
| 76 else: | |
| 77 for line in a_chunk[0:3]: | |
| 78 output += self._format_equal(line) | |
| 79 self.a_line_no += len(a_chunk) - 6 | |
| 80 self.b_line_no += len(b_chunk) - 6 | |
| 81 output += '<tr><td colspan=3>\n\n</tr>' | |
| 82 # Do not show context lines at the end of the file. | |
| 83 if self.a_line_no + 3 != self.a_lines_len: | |
| 84 for line in a_chunk[len(a_chunk) - 3:len(a_chunk)]: | |
| 85 output += self._format_equal(line) | |
| 86 return output | |
|
qyearsley
2017/02/15 00:02:22
Possible refactoring: Could lines 66 to 86 here be
tkent
2017/02/15 05:08:09
Done.
| |
| 63 | 87 |
| 64 def _format_insert(chunk): | 88 def _format_equal(self, line): |
| 65 return '<span class="add">%s</span>' % cgi.escape(chunk) | 89 self.a_line_no += 1 |
| 90 self.b_line_no += 1 | |
| 91 return '<tr><th>%d<th>%d<td>%s</tr>' % (self.a_line_no, self.b_line_no, cgi.escape(line)) | |
| 66 | 92 |
| 93 def _format_insert(self, chunk): | |
| 94 output = '' | |
| 95 for line in chunk: | |
| 96 self.b_line_no += 1 | |
| 97 output += '<tr><th><th>%d<td class="add">%s</tr>' % (self.b_line_no, cgi.escape(line)) | |
| 98 return output | |
| 67 | 99 |
| 68 def _format_delete(chunk): | 100 def _format_delete(self, chunk): |
| 69 return '<span class="del">%s</span>' % cgi.escape(chunk) | 101 output = '' |
| 102 for line in chunk: | |
| 103 self.a_line_no += 1 | |
| 104 output += '<tr><th>%d<th><td class="del">%s</tr>' % (self.a_line_no, cgi.escape(line)) | |
| 105 return output | |
| OLD | NEW |