OLD | NEW |
| (Empty) |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 """Utility file for pretty print xml file. | |
6 | |
7 The function PrettyPrintNode will be used for formatting both histograms.xml | |
8 and actions.xml. | |
9 """ | |
10 | |
11 import logging | |
12 import textwrap | |
13 import xml.dom.minidom | |
14 | |
15 WRAP_COLUMN = 80 | |
16 | |
17 | |
18 class Error(Exception): | |
19 pass | |
20 | |
21 | |
22 def LastLineLength(s): | |
23 """Returns the length of the last line in s. | |
24 | |
25 Args: | |
26 s: A multi-line string, including newlines. | |
27 | |
28 Returns: | |
29 The length of the last line in s, in characters. | |
30 """ | |
31 if s.rfind('\n') == -1: return len(s) | |
32 return len(s) - s.rfind('\n') - len('\n') | |
33 | |
34 | |
35 def XmlEscape(s): | |
36 """XML-escapes the given string, replacing magic characters (&<>") with their | |
37 escaped equivalents.""" | |
38 s = s.replace("&", "&").replace("<", "<") | |
39 s = s.replace("\"", """).replace(">", ">") | |
40 return s | |
41 | |
42 | |
43 class XmlStyle(object): | |
44 """A class that stores all style specification for an output xml file.""" | |
45 | |
46 def __init__(self, attribute_order, tags_that_have_extra_newline, | |
47 tags_that_dont_indent, tags_that_allow_single_line): | |
48 # List of tag names for top-level nodes whose children are not indented. | |
49 self.attribute_order = attribute_order | |
50 self.tags_that_have_extra_newline = tags_that_have_extra_newline | |
51 self.tags_that_dont_indent = tags_that_dont_indent | |
52 self.tags_that_allow_single_line = tags_that_allow_single_line | |
53 | |
54 def PrettyPrintNode(self, node, indent=0): | |
55 """Pretty-prints the given XML node at the given indent level. | |
56 | |
57 Args: | |
58 node: The minidom node to pretty-print. | |
59 indent: The current indent level. | |
60 | |
61 Returns: | |
62 The pretty-printed string (including embedded newlines). | |
63 | |
64 Raises: | |
65 Error if the XML has unknown tags or attributes. | |
66 """ | |
67 # Handle the top-level document node. | |
68 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: | |
69 return '\n'.join([self.PrettyPrintNode(n) for n in node.childNodes]) | |
70 | |
71 # Handle text nodes. | |
72 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: | |
73 # Wrap each paragraph in the text to fit in the 80 column limit. | |
74 wrapper = textwrap.TextWrapper() | |
75 wrapper.initial_indent = ' ' * indent | |
76 wrapper.subsequent_indent = ' ' * indent | |
77 wrapper.break_on_hyphens = False | |
78 wrapper.break_long_words = False | |
79 wrapper.width = WRAP_COLUMN | |
80 text = XmlEscape(node.data) | |
81 # Remove any common indent. | |
82 text = textwrap.dedent(text.strip('\n')) | |
83 lines = text.split('\n') | |
84 # Split the text into paragraphs at blank line boundaries. | |
85 paragraphs = [[]] | |
86 for l in lines: | |
87 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0: | |
88 paragraphs.append([]) | |
89 else: | |
90 paragraphs[-1].append(l) | |
91 # Remove trailing empty paragraph if present. | |
92 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: | |
93 paragraphs = paragraphs[:-1] | |
94 # Wrap each paragraph and separate with two newlines. | |
95 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) | |
96 | |
97 # Handle element nodes. | |
98 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: | |
99 newlines_after_open, newlines_before_close, newlines_after_close = ( | |
100 self.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0))) | |
101 # Open the tag. | |
102 s = ' ' * indent + '<' + node.tagName | |
103 | |
104 # Calculate how much space to allow for the '>' or '/>'. | |
105 closing_chars = 1 | |
106 if not node.childNodes: | |
107 closing_chars = 2 | |
108 | |
109 # Pretty-print the attributes. | |
110 attributes = node.attributes.keys() | |
111 if attributes: | |
112 # Reorder the attributes. | |
113 if node.tagName not in self.attribute_order: | |
114 unrecognized_attributes = attributes | |
115 else: | |
116 unrecognized_attributes = ( | |
117 [a for a in attributes | |
118 if a not in self.attribute_order[node.tagName]]) | |
119 attributes = [a for a in self.attribute_order[node.tagName] | |
120 if a in attributes] | |
121 | |
122 for a in unrecognized_attributes: | |
123 logging.error( | |
124 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) | |
125 if unrecognized_attributes: | |
126 raise Error() | |
127 | |
128 for a in attributes: | |
129 value = XmlEscape(node.attributes[a].value) | |
130 # Replace sequences of whitespace with single spaces. | |
131 words = value.split() | |
132 a_str = ' %s="%s"' % (a, ' '.join(words)) | |
133 # Start a new line if the attribute will make this line too long. | |
134 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN: | |
135 s += '\n' + ' ' * (indent + 3) | |
136 # Output everything up to the first quote. | |
137 s += ' %s="' % (a) | |
138 value_indent_level = LastLineLength(s) | |
139 # Output one word at a time, splitting to the next line where | |
140 # necessary. | |
141 column = value_indent_level | |
142 for i, word in enumerate(words): | |
143 # This is slightly too conservative since not every word will be | |
144 # followed by the closing characters... | |
145 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN): | |
146 s = s.rstrip() # remove any trailing whitespace | |
147 s += '\n' + ' ' * value_indent_level | |
148 column = value_indent_level | |
149 s += word + ' ' | |
150 column += len(word) + 1 | |
151 s = s.rstrip() # remove any trailing whitespace | |
152 s += '"' | |
153 s = s.rstrip() # remove any trailing whitespace | |
154 | |
155 # Pretty-print the child nodes. | |
156 if node.childNodes: | |
157 s += '>' | |
158 # Calculate the new indent level for child nodes. | |
159 new_indent = indent | |
160 if node.tagName not in self.tags_that_dont_indent: | |
161 new_indent += 2 | |
162 child_nodes = node.childNodes | |
163 | |
164 # Recursively pretty-print the child nodes. | |
165 child_nodes = [self.PrettyPrintNode(n, indent=new_indent) | |
166 for n in child_nodes] | |
167 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] | |
168 | |
169 # Determine whether we can fit the entire node on a single line. | |
170 close_tag = '</%s>' % node.tagName | |
171 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) | |
172 if (node.tagName in self.tags_that_allow_single_line and | |
173 len(child_nodes) == 1 and | |
174 len(child_nodes[0].strip()) <= space_left): | |
175 s += child_nodes[0].strip() | |
176 else: | |
177 s += '\n' * newlines_after_open + '\n'.join(child_nodes) | |
178 s += '\n' * newlines_before_close + ' ' * indent | |
179 s += close_tag | |
180 else: | |
181 s += '/>' | |
182 s += '\n' * newlines_after_close | |
183 return s | |
184 | |
185 # Handle comment nodes. | |
186 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: | |
187 return '<!--%s-->\n' % node.data | |
188 | |
189 # Ignore other node types. This could be a processing instruction | |
190 # (<? ... ?>) or cdata section (<![CDATA[...]]!>), neither of which are | |
191 # legal in the histograms XML at present. | |
192 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) | |
193 raise Error() | |
OLD | NEW |