OLD | NEW |
(Empty) | |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 """Utility file for pretty print xml file. |
| 6 |
| 7 The function PrettyPrintNode will be used for formatting both histograms.xml |
| 8 and actions.xml. |
| 9 """ |
| 10 |
| 11 import logging |
| 12 import textwrap |
| 13 import xml.dom.minidom |
| 14 |
| 15 WRAP_COLUMN = 80 |
| 16 |
| 17 |
| 18 class Error(Exception): |
| 19 pass |
| 20 |
| 21 |
| 22 def LastLineLength(s): |
| 23 """Returns the length of the last line in s. |
| 24 |
| 25 Args: |
| 26 s: A multi-line string, including newlines. |
| 27 |
| 28 Returns: |
| 29 The length of the last line in s, in characters. |
| 30 """ |
| 31 if s.rfind('\n') == -1: return len(s) |
| 32 return len(s) - s.rfind('\n') - len('\n') |
| 33 |
| 34 |
| 35 def XmlEscape(s): |
| 36 """XML-escapes the given string, replacing magic characters (&<>") with their |
| 37 escaped equivalents.""" |
| 38 s = s.replace("&", "&").replace("<", "<") |
| 39 s = s.replace("\"", """).replace(">", ">") |
| 40 return s |
| 41 |
| 42 |
| 43 class XmlStyle(object): |
| 44 """A class that stores all style specification for an output xml file.""" |
| 45 |
| 46 def __init__(self, attribute_order, tags_that_have_extra_newline, |
| 47 tags_that_dont_indent, tags_that_allow_single_line): |
| 48 # List of tag names for top-level nodes whose children are not indented. |
| 49 self.attribute_order = attribute_order |
| 50 self.tags_that_have_extra_newline = tags_that_have_extra_newline |
| 51 self.tags_that_dont_indent = tags_that_dont_indent |
| 52 self.tags_that_allow_single_line = tags_that_allow_single_line |
| 53 |
| 54 def PrettyPrintNode(self, node, indent=0): |
| 55 """Pretty-prints the given XML node at the given indent level. |
| 56 |
| 57 Args: |
| 58 node: The minidom node to pretty-print. |
| 59 indent: The current indent level. |
| 60 |
| 61 Returns: |
| 62 The pretty-printed string (including embedded newlines). |
| 63 |
| 64 Raises: |
| 65 Error if the XML has unknown tags or attributes. |
| 66 """ |
| 67 # Handle the top-level document node. |
| 68 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: |
| 69 return '\n'.join([self.PrettyPrintNode(n) for n in node.childNodes]) |
| 70 |
| 71 # Handle text nodes. |
| 72 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: |
| 73 # Wrap each paragraph in the text to fit in the 80 column limit. |
| 74 wrapper = textwrap.TextWrapper() |
| 75 wrapper.initial_indent = ' ' * indent |
| 76 wrapper.subsequent_indent = ' ' * indent |
| 77 wrapper.break_on_hyphens = False |
| 78 wrapper.break_long_words = False |
| 79 wrapper.width = WRAP_COLUMN |
| 80 text = XmlEscape(node.data) |
| 81 # Remove any common indent. |
| 82 text = textwrap.dedent(text.strip('\n')) |
| 83 lines = text.split('\n') |
| 84 # Split the text into paragraphs at blank line boundaries. |
| 85 paragraphs = [[]] |
| 86 for l in lines: |
| 87 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0: |
| 88 paragraphs.append([]) |
| 89 else: |
| 90 paragraphs[-1].append(l) |
| 91 # Remove trailing empty paragraph if present. |
| 92 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: |
| 93 paragraphs = paragraphs[:-1] |
| 94 # Wrap each paragraph and separate with two newlines. |
| 95 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) |
| 96 |
| 97 # Handle element nodes. |
| 98 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: |
| 99 newlines_after_open, newlines_before_close, newlines_after_close = ( |
| 100 self.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0))) |
| 101 # Open the tag. |
| 102 s = ' ' * indent + '<' + node.tagName |
| 103 |
| 104 # Calculate how much space to allow for the '>' or '/>'. |
| 105 closing_chars = 1 |
| 106 if not node.childNodes: |
| 107 closing_chars = 2 |
| 108 |
| 109 # Pretty-print the attributes. |
| 110 attributes = node.attributes.keys() |
| 111 if attributes: |
| 112 # Reorder the attributes. |
| 113 if node.tagName not in self.attribute_order: |
| 114 unrecognized_attributes = attributes |
| 115 else: |
| 116 unrecognized_attributes = ( |
| 117 [a for a in attributes |
| 118 if a not in self.attribute_order[node.tagName]]) |
| 119 attributes = [a for a in self.attribute_order[node.tagName] |
| 120 if a in attributes] |
| 121 |
| 122 for a in unrecognized_attributes: |
| 123 logging.error( |
| 124 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) |
| 125 if unrecognized_attributes: |
| 126 raise Error() |
| 127 |
| 128 for a in attributes: |
| 129 value = XmlEscape(node.attributes[a].value) |
| 130 # Replace sequences of whitespace with single spaces. |
| 131 words = value.split() |
| 132 a_str = ' %s="%s"' % (a, ' '.join(words)) |
| 133 # Start a new line if the attribute will make this line too long. |
| 134 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN: |
| 135 s += '\n' + ' ' * (indent + 3) |
| 136 # Output everything up to the first quote. |
| 137 s += ' %s="' % (a) |
| 138 value_indent_level = LastLineLength(s) |
| 139 # Output one word at a time, splitting to the next line where |
| 140 # necessary. |
| 141 column = value_indent_level |
| 142 for i, word in enumerate(words): |
| 143 # This is slightly too conservative since not every word will be |
| 144 # followed by the closing characters... |
| 145 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN): |
| 146 s = s.rstrip() # remove any trailing whitespace |
| 147 s += '\n' + ' ' * value_indent_level |
| 148 column = value_indent_level |
| 149 s += word + ' ' |
| 150 column += len(word) + 1 |
| 151 s = s.rstrip() # remove any trailing whitespace |
| 152 s += '"' |
| 153 s = s.rstrip() # remove any trailing whitespace |
| 154 |
| 155 # Pretty-print the child nodes. |
| 156 if node.childNodes: |
| 157 s += '>' |
| 158 # Calculate the new indent level for child nodes. |
| 159 new_indent = indent |
| 160 if node.tagName not in self.tags_that_dont_indent: |
| 161 new_indent += 2 |
| 162 child_nodes = node.childNodes |
| 163 |
| 164 # Recursively pretty-print the child nodes. |
| 165 child_nodes = [self.PrettyPrintNode(n, indent=new_indent) |
| 166 for n in child_nodes] |
| 167 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] |
| 168 |
| 169 # Determine whether we can fit the entire node on a single line. |
| 170 close_tag = '</%s>' % node.tagName |
| 171 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) |
| 172 if (node.tagName in self.tags_that_allow_single_line and |
| 173 len(child_nodes) == 1 and |
| 174 len(child_nodes[0].strip()) <= space_left): |
| 175 s += child_nodes[0].strip() |
| 176 else: |
| 177 s += '\n' * newlines_after_open + '\n'.join(child_nodes) |
| 178 s += '\n' * newlines_before_close + ' ' * indent |
| 179 s += close_tag |
| 180 else: |
| 181 s += '/>' |
| 182 s += '\n' * newlines_after_close |
| 183 return s |
| 184 |
| 185 # Handle comment nodes. |
| 186 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: |
| 187 return '<!--%s-->\n' % node.data |
| 188 |
| 189 # Ignore other node types. This could be a processing instruction |
| 190 # (<? ... ?>) or cdata section (<![CDATA[...]]!>), neither of which are |
| 191 # legal in the histograms XML at present. |
| 192 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) |
| 193 raise Error() |
OLD | NEW |