Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 import logging | |
|
Alexei Svitkine (slow)
2014/01/31 19:56:58
Needs some comments / copyright block at the top.
yao
2014/02/03 14:39:20
Done.
| |
| 2 import textwrap | |
| 3 import xml.dom.minidom | |
| 4 | |
| 5 WRAP_COLUMN = 80 | |
| 6 | |
| 7 | |
| 8 class Error(Exception): | |
| 9 pass | |
| 10 | |
| 11 | |
| 12 def LastLineLength(s): | |
| 13 """Returns the length of the last line in s. | |
| 14 | |
| 15 Args: | |
| 16 s: A multi-line string, including newlines. | |
| 17 | |
| 18 Returns: | |
| 19 The length of the last line in s, in characters. | |
| 20 """ | |
| 21 if s.rfind('\n') == -1: return len(s) | |
| 22 return len(s) - s.rfind('\n') - len('\n') | |
| 23 | |
| 24 | |
| 25 def XmlEscape(s): | |
| 26 """XML-escapes the given string, replacing magic characters (&<>") with their | |
| 27 escaped equivalents.""" | |
| 28 s = s.replace("&", "&").replace("<", "<") | |
| 29 s = s.replace("\"", """).replace(">", ">") | |
| 30 return s | |
| 31 | |
| 32 | |
| 33 class XmlStyle(object): | |
| 34 """A class that stores all style specification for an output xml file.""" | |
| 35 | |
| 36 def __init__(self, attribute_order, tags_that_have_extra_newline, | |
| 37 tags_that_dont_indent, tags_that_allow_single_line): | |
| 38 # List of tag names for top-level nodes whose children are not indented. | |
| 39 self.attribute_order = attribute_order | |
| 40 self.tags_that_have_extra_newline = tags_that_have_extra_newline | |
| 41 self.tags_that_dont_indent = tags_that_dont_indent | |
| 42 self.tags_that_allow_single_line = tags_that_allow_single_line | |
| 43 | |
| 44 | |
| 45 def PrettyPrintNode(node, xml_style, indent=0): | |
|
Alexei Svitkine (slow)
2014/01/31 19:56:58
Can you make it so this file is marked as being br
yao
2014/02/03 14:39:20
Done.
| |
| 46 """Pretty-prints the given XML node at the given indent level. | |
| 47 | |
| 48 Args: | |
| 49 node: The minidom node to pretty-print. | |
| 50 xml_style: An XmlStyle object that represents the style requirement of the | |
| 51 output xml file. | |
| 52 indent: The current indent level. | |
| 53 | |
| 54 Returns: | |
| 55 The pretty-printed string (including embedded newlines). | |
| 56 | |
| 57 Raises: | |
| 58 Error if the XML has unknown tags or attributes. | |
| 59 """ | |
| 60 # Handle the top-level document node. | |
| 61 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: | |
| 62 return '\n'.join([PrettyPrintNode(n, xml_style) for n in node.childNodes]) | |
| 63 | |
| 64 # Handle text nodes. | |
| 65 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: | |
| 66 # Wrap each paragraph in the text to fit in the 80 column limit. | |
| 67 wrapper = textwrap.TextWrapper() | |
| 68 wrapper.initial_indent = ' ' * indent | |
| 69 wrapper.subsequent_indent = ' ' * indent | |
| 70 wrapper.break_on_hyphens = False | |
| 71 wrapper.break_long_words = False | |
| 72 wrapper.width = WRAP_COLUMN | |
| 73 text = XmlEscape(node.data) | |
| 74 # Remove any common indent. | |
| 75 text = textwrap.dedent(text.strip('\n')) | |
| 76 lines = text.split('\n') | |
| 77 # Split the text into paragraphs at blank line boundaries. | |
| 78 paragraphs = [[]] | |
| 79 for l in lines: | |
| 80 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0: | |
| 81 paragraphs.append([]) | |
| 82 else: | |
| 83 paragraphs[-1].append(l) | |
| 84 # Remove trailing empty paragraph if present. | |
| 85 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: | |
| 86 paragraphs = paragraphs[:-1] | |
| 87 # Wrap each paragraph and separate with two newlines. | |
| 88 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) | |
| 89 | |
| 90 # Handle element nodes. | |
| 91 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: | |
| 92 newlines_after_open, newlines_before_close, newlines_after_close = ( | |
| 93 xml_style.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0))) | |
| 94 # Open the tag. | |
| 95 s = ' ' * indent + '<' + node.tagName | |
| 96 | |
| 97 # Calculate how much space to allow for the '>' or '/>'. | |
| 98 closing_chars = 1 | |
| 99 if not node.childNodes: | |
| 100 closing_chars = 2 | |
| 101 | |
| 102 # Pretty-print the attributes. | |
| 103 attributes = node.attributes.keys() | |
| 104 if attributes: | |
| 105 # Reorder the attributes. | |
| 106 if node.tagName not in xml_style.attribute_order: | |
| 107 unrecognized_attributes = attributes | |
| 108 else: | |
| 109 unrecognized_attributes = ( | |
| 110 [a for a in attributes | |
| 111 if a not in xml_style.attribute_order[node.tagName]]) | |
| 112 attributes = [a for a in xml_style.attribute_order[node.tagName] | |
| 113 if a in attributes] | |
| 114 | |
| 115 for a in unrecognized_attributes: | |
| 116 logging.error( | |
| 117 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) | |
| 118 if unrecognized_attributes: | |
| 119 raise Error() | |
| 120 | |
| 121 for a in attributes: | |
| 122 value = XmlEscape(node.attributes[a].value) | |
| 123 # Replace sequences of whitespace with single spaces. | |
| 124 words = value.split() | |
| 125 a_str = ' %s="%s"' % (a, ' '.join(words)) | |
| 126 # Start a new line if the attribute will make this line too long. | |
| 127 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN: | |
| 128 s += '\n' + ' ' * (indent + 3) | |
| 129 # Output everything up to the first quote. | |
| 130 s += ' %s="' % (a) | |
| 131 value_indent_level = LastLineLength(s) | |
| 132 # Output one word at a time, splitting to the next line where necessary. | |
| 133 column = value_indent_level | |
| 134 for i, word in enumerate(words): | |
| 135 # This is slightly too conservative since not every word will be | |
| 136 # followed by the closing characters... | |
| 137 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN): | |
| 138 s = s.rstrip() # remove any trailing whitespace | |
| 139 s += '\n' + ' ' * value_indent_level | |
| 140 column = value_indent_level | |
| 141 s += word + ' ' | |
| 142 column += len(word) + 1 | |
| 143 s = s.rstrip() # remove any trailing whitespace | |
| 144 s += '"' | |
| 145 s = s.rstrip() # remove any trailing whitespace | |
| 146 | |
| 147 # Pretty-print the child nodes. | |
| 148 if node.childNodes: | |
| 149 s += '>' | |
| 150 # Calculate the new indent level for child nodes. | |
| 151 new_indent = indent | |
| 152 if node.tagName not in xml_style.tags_that_dont_indent: | |
| 153 new_indent += 2 | |
| 154 child_nodes = node.childNodes | |
| 155 | |
| 156 # Recursively pretty-print the child nodes. | |
| 157 child_nodes = [PrettyPrintNode(n, xml_style, indent=new_indent) | |
| 158 for n in child_nodes] | |
| 159 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] | |
| 160 | |
| 161 # Determine whether we can fit the entire node on a single line. | |
| 162 close_tag = '</%s>' % node.tagName | |
| 163 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) | |
| 164 if (node.tagName in xml_style.tags_that_allow_single_line and | |
| 165 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): | |
| 166 s += child_nodes[0].strip() | |
| 167 else: | |
| 168 s += '\n' * newlines_after_open + '\n'.join(child_nodes) | |
| 169 s += '\n' * newlines_before_close + ' ' * indent | |
| 170 s += close_tag | |
| 171 else: | |
| 172 s += '/>' | |
| 173 s += '\n' * newlines_after_close | |
| 174 return s | |
| 175 | |
| 176 # Handle comment nodes. | |
| 177 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: | |
| 178 return '<!--%s-->\n' % node.data | |
| 179 | |
| 180 # Ignore other node types. This could be a processing instruction (<? ... ?>) | |
| 181 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the | |
| 182 # histograms XML at present. | |
| 183 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) | |
| 184 raise Error() | |
| OLD | NEW |