OLD | NEW |
---|---|
1 #!/usr/bin/env python | |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 3 # found in the LICENSE file. |
5 | 4 |
6 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text | 5 """Utility file for pretty print xml file.""" |
Alexei Svitkine (slow)
2014/02/03 19:37:34
This description should be more clear, otherwise i
yao
2014/02/04 19:08:12
Done.
| |
7 at 80 chars, enforcing standard attribute ordering, and standardizing | |
8 indentation. | |
9 | 6 |
10 This is quite a bit more complicated than just calling tree.toprettyxml(); | |
11 we need additional customization, like special attribute ordering in tags | |
12 and wrapping text nodes, so we implement our own full custom XML pretty-printer. | |
13 """ | |
14 | |
15 from __future__ import with_statement | |
16 | |
17 import diffutil | |
18 import json | |
19 import logging | 7 import logging |
20 import os | |
21 import shutil | |
22 import sys | |
23 import textwrap | 8 import textwrap |
24 import xml.dom.minidom | 9 import xml.dom.minidom |
25 | 10 |
26 sys.path.insert(1, os.path.join(sys.path[0], '..', '..', 'python')) | |
27 from google import path_utils | |
28 | |
29 WRAP_COLUMN = 80 | 11 WRAP_COLUMN = 80 |
30 | 12 |
31 # Desired order for tag attributes; attributes listed here will appear first, | |
32 # and in the same order as in these lists. | |
33 # { tag_name: [attribute_name, ...] } | |
34 ATTRIBUTE_ORDER = { | |
35 'enum': ['name', 'type'], | |
36 'histogram': ['name', 'enum', 'units'], | |
37 'int': ['value', 'label'], | |
38 'fieldtrial': ['name', 'separator', 'ordering'], | |
39 'group': ['name', 'label'], | |
40 'affected-histogram': ['name'], | |
41 'with-group': ['name'], | |
42 } | |
43 | |
44 # Tag names for top-level nodes whose children we don't want to indent. | |
45 TAGS_THAT_DONT_INDENT = [ | |
46 'histogram-configuration', | |
47 'histograms', | |
48 'fieldtrials', | |
49 'enums' | |
50 ] | |
51 | |
52 # Extra vertical spacing rules for special tag names. | |
53 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)} | |
54 TAGS_THAT_HAVE_EXTRA_NEWLINE = { | |
55 'histogram-configuration': (2, 1, 1), | |
56 'histograms': (2, 1, 1), | |
57 'fieldtrials': (2, 1, 1), | |
58 'enums': (2, 1, 1), | |
59 'histogram': (1, 1, 1), | |
60 'enum': (1, 1, 1), | |
61 'fieldtrial': (1, 1, 1), | |
62 } | |
63 | |
64 # Tags that we allow to be squished into a single line for brevity. | |
65 TAGS_THAT_ALLOW_SINGLE_LINE = [ | |
66 'summary', | |
67 'int', | |
68 ] | |
69 | |
70 # Tags whose children we want to alphabetize. The key is the parent tag name, | |
71 # and the value is a pair of the tag name of the children we want to sort, | |
72 # and a key function that maps each child node to the desired sort key. | |
73 ALPHABETIZATION_RULES = { | |
74 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()), | |
75 'enums': ('enum', lambda n: n.attributes['name'].value.lower()), | |
76 'enum': ('int', lambda n: int(n.attributes['value'].value)), | |
77 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()), | |
78 'fieldtrial': ('affected-histogram', | |
79 lambda n: n.attributes['name'].value.lower()), | |
80 } | |
81 | |
82 | 13 |
83 class Error(Exception): | 14 class Error(Exception): |
84 pass | 15 pass |
85 | 16 |
86 | 17 |
87 def LastLineLength(s): | 18 def LastLineLength(s): |
88 """Returns the length of the last line in s. | 19 """Returns the length of the last line in s. |
89 | 20 |
90 Args: | 21 Args: |
91 s: A multi-line string, including newlines. | 22 s: A multi-line string, including newlines. |
92 | 23 |
93 Returns: | 24 Returns: |
94 The length of the last line in s, in characters. | 25 The length of the last line in s, in characters. |
95 """ | 26 """ |
96 if s.rfind('\n') == -1: return len(s) | 27 if s.rfind('\n') == -1: return len(s) |
97 return len(s) - s.rfind('\n') - len('\n') | 28 return len(s) - s.rfind('\n') - len('\n') |
98 | 29 |
99 | 30 |
100 def XmlEscape(s): | 31 def XmlEscape(s): |
101 """XML-escapes the given string, replacing magic characters (&<>") with their | 32 """XML-escapes the given string, replacing magic characters (&<>") with their |
102 escaped equivalents.""" | 33 escaped equivalents.""" |
103 s = s.replace("&", "&").replace("<", "<") | 34 s = s.replace("&", "&").replace("<", "<") |
104 s = s.replace("\"", """).replace(">", ">") | 35 s = s.replace("\"", """).replace(">", ">") |
105 return s | 36 return s |
106 | 37 |
107 | 38 |
108 def PrettyPrintNode(node, indent=0): | 39 class XmlStyle(object): |
40 """A class that stores all style specification for an output xml file.""" | |
41 | |
42 def __init__(self, attribute_order, tags_that_have_extra_newline, | |
43 tags_that_dont_indent, tags_that_allow_single_line): | |
44 # List of tag names for top-level nodes whose children are not indented. | |
45 self.attribute_order = attribute_order | |
46 self.tags_that_have_extra_newline = tags_that_have_extra_newline | |
47 self.tags_that_dont_indent = tags_that_dont_indent | |
48 self.tags_that_allow_single_line = tags_that_allow_single_line | |
49 | |
50 | |
51 def PrettyPrintNode(node, xml_style, indent=0): | |
Alexei Svitkine (slow)
2014/02/03 19:37:34
Now that you have the XmlStyle object, can you mak
yao
2014/02/04 19:08:12
Done.
| |
109 """Pretty-prints the given XML node at the given indent level. | 52 """Pretty-prints the given XML node at the given indent level. |
110 | 53 |
111 Args: | 54 Args: |
112 node: The minidom node to pretty-print. | 55 node: The minidom node to pretty-print. |
56 xml_style: An XmlStyle object that represents the style requirement of the | |
57 output xml file. | |
113 indent: The current indent level. | 58 indent: The current indent level. |
114 | 59 |
115 Returns: | 60 Returns: |
116 The pretty-printed string (including embedded newlines). | 61 The pretty-printed string (including embedded newlines). |
117 | 62 |
118 Raises: | 63 Raises: |
119 Error if the XML has unknown tags or attributes. | 64 Error if the XML has unknown tags or attributes. |
120 """ | 65 """ |
121 # Handle the top-level document node. | 66 # Handle the top-level document node. |
122 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: | 67 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: |
123 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes]) | 68 return '\n'.join([PrettyPrintNode(n, xml_style) for n in node.childNodes]) |
124 | 69 |
125 # Handle text nodes. | 70 # Handle text nodes. |
126 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: | 71 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: |
127 # Wrap each paragraph in the text to fit in the 80 column limit. | 72 # Wrap each paragraph in the text to fit in the 80 column limit. |
128 wrapper = textwrap.TextWrapper() | 73 wrapper = textwrap.TextWrapper() |
129 wrapper.initial_indent = ' ' * indent | 74 wrapper.initial_indent = ' ' * indent |
130 wrapper.subsequent_indent = ' ' * indent | 75 wrapper.subsequent_indent = ' ' * indent |
131 wrapper.break_on_hyphens = False | 76 wrapper.break_on_hyphens = False |
132 wrapper.break_long_words = False | 77 wrapper.break_long_words = False |
133 wrapper.width = WRAP_COLUMN | 78 wrapper.width = WRAP_COLUMN |
(...skipping 10 matching lines...) Expand all Loading... | |
144 paragraphs[-1].append(l) | 89 paragraphs[-1].append(l) |
145 # Remove trailing empty paragraph if present. | 90 # Remove trailing empty paragraph if present. |
146 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: | 91 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: |
147 paragraphs = paragraphs[:-1] | 92 paragraphs = paragraphs[:-1] |
148 # Wrap each paragraph and separate with two newlines. | 93 # Wrap each paragraph and separate with two newlines. |
149 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) | 94 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) |
150 | 95 |
151 # Handle element nodes. | 96 # Handle element nodes. |
152 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: | 97 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: |
153 newlines_after_open, newlines_before_close, newlines_after_close = ( | 98 newlines_after_open, newlines_before_close, newlines_after_close = ( |
154 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0))) | 99 xml_style.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0))) |
155 # Open the tag. | 100 # Open the tag. |
156 s = ' ' * indent + '<' + node.tagName | 101 s = ' ' * indent + '<' + node.tagName |
157 | 102 |
158 # Calculate how much space to allow for the '>' or '/>'. | 103 # Calculate how much space to allow for the '>' or '/>'. |
159 closing_chars = 1 | 104 closing_chars = 1 |
160 if not node.childNodes: | 105 if not node.childNodes: |
161 closing_chars = 2 | 106 closing_chars = 2 |
162 | 107 |
163 # Pretty-print the attributes. | 108 # Pretty-print the attributes. |
164 attributes = node.attributes.keys() | 109 attributes = node.attributes.keys() |
165 if attributes: | 110 if attributes: |
166 # Reorder the attributes. | 111 # Reorder the attributes. |
167 if not node.tagName in ATTRIBUTE_ORDER: | 112 if node.tagName not in xml_style.attribute_order: |
168 unrecognized_attributes = attributes; | 113 unrecognized_attributes = attributes |
169 else: | 114 else: |
170 unrecognized_attributes = ( | 115 unrecognized_attributes = ( |
171 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]]) | 116 [a for a in attributes |
172 attributes = ( | 117 if a not in xml_style.attribute_order[node.tagName]]) |
173 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes]) | 118 attributes = [a for a in xml_style.attribute_order[node.tagName] |
119 if a in attributes] | |
174 | 120 |
175 for a in unrecognized_attributes: | 121 for a in unrecognized_attributes: |
176 logging.error( | 122 logging.error( |
177 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) | 123 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) |
178 if unrecognized_attributes: | 124 if unrecognized_attributes: |
179 raise Error() | 125 raise Error() |
180 | 126 |
181 for a in attributes: | 127 for a in attributes: |
182 value = XmlEscape(node.attributes[a].value) | 128 value = XmlEscape(node.attributes[a].value) |
183 # Replace sequences of whitespace with single spaces. | 129 # Replace sequences of whitespace with single spaces. |
(...skipping 18 matching lines...) Expand all Loading... | |
202 column += len(word) + 1 | 148 column += len(word) + 1 |
203 s = s.rstrip() # remove any trailing whitespace | 149 s = s.rstrip() # remove any trailing whitespace |
204 s += '"' | 150 s += '"' |
205 s = s.rstrip() # remove any trailing whitespace | 151 s = s.rstrip() # remove any trailing whitespace |
206 | 152 |
207 # Pretty-print the child nodes. | 153 # Pretty-print the child nodes. |
208 if node.childNodes: | 154 if node.childNodes: |
209 s += '>' | 155 s += '>' |
210 # Calculate the new indent level for child nodes. | 156 # Calculate the new indent level for child nodes. |
211 new_indent = indent | 157 new_indent = indent |
212 if node.tagName not in TAGS_THAT_DONT_INDENT: | 158 if node.tagName not in xml_style.tags_that_dont_indent: |
213 new_indent += 2 | 159 new_indent += 2 |
214 child_nodes = node.childNodes | 160 child_nodes = node.childNodes |
215 | 161 |
216 # Recursively pretty-print the child nodes. | 162 # Recursively pretty-print the child nodes. |
217 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes] | 163 child_nodes = [PrettyPrintNode(n, xml_style, indent=new_indent) |
164 for n in child_nodes] | |
218 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] | 165 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] |
219 | 166 |
220 # Determine whether we can fit the entire node on a single line. | 167 # Determine whether we can fit the entire node on a single line. |
221 close_tag = '</%s>' % node.tagName | 168 close_tag = '</%s>' % node.tagName |
222 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) | 169 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) |
223 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and | 170 if (node.tagName in xml_style.tags_that_allow_single_line and |
224 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): | 171 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): |
225 s += child_nodes[0].strip() | 172 s += child_nodes[0].strip() |
226 else: | 173 else: |
227 s += '\n' * newlines_after_open + '\n'.join(child_nodes) | 174 s += '\n' * newlines_after_open + '\n'.join(child_nodes) |
228 s += '\n' * newlines_before_close + ' ' * indent | 175 s += '\n' * newlines_before_close + ' ' * indent |
229 s += close_tag | 176 s += close_tag |
230 else: | 177 else: |
231 s += '/>' | 178 s += '/>' |
232 s += '\n' * newlines_after_close | 179 s += '\n' * newlines_after_close |
233 return s | 180 return s |
234 | 181 |
235 # Handle comment nodes. | 182 # Handle comment nodes. |
236 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: | 183 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: |
237 return '<!--%s-->\n' % node.data | 184 return '<!--%s-->\n' % node.data |
238 | 185 |
239 # Ignore other node types. This could be a processing instruction (<? ... ?>) | 186 # Ignore other node types. This could be a processing instruction (<? ... ?>) |
240 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the | 187 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the |
241 # histograms XML at present. | 188 # histograms XML at present. |
242 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) | 189 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) |
243 raise Error() | 190 raise Error() |
244 | |
245 | |
246 def unsafeAppendChild(parent, child): | |
247 """Append child to parent's list of children, ignoring the possibility that it | |
248 is already in another node's childNodes list. Requires that the previous | |
249 parent of child is discarded (to avoid non-tree DOM graphs). | |
250 This can provide a significant speedup as O(n^2) operations are removed (in | |
251 particular, each child insertion avoids the need to traverse the old parent's | |
252 entire list of children).""" | |
253 child.parentNode = None | |
254 parent.appendChild(child) | |
255 child.parentNode = parent | |
256 | |
257 | |
258 def TransformByAlphabetizing(node): | |
259 """Transform the given XML by alphabetizing specific node types according to | |
260 the rules in ALPHABETIZATION_RULES. | |
261 | |
262 Args: | |
263 node: The minidom node to transform. | |
264 | |
265 Returns: | |
266 The minidom node, with children appropriately alphabetized. Note that the | |
267 transformation is done in-place, i.e. the original minidom tree is modified | |
268 directly. | |
269 """ | |
270 if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE: | |
271 for c in node.childNodes: TransformByAlphabetizing(c) | |
272 return node | |
273 | |
274 # Element node with a tag name that we alphabetize the children of? | |
275 if node.tagName in ALPHABETIZATION_RULES: | |
276 # Put subnodes in a list of node,key pairs to allow for custom sorting. | |
277 subtag, key_function = ALPHABETIZATION_RULES[node.tagName] | |
278 subnodes = [] | |
279 last_key = -1 | |
280 for c in node.childNodes: | |
281 if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and | |
282 c.tagName == subtag): | |
283 last_key = key_function(c) | |
284 # Subnodes that we don't want to rearrange use the last node's key, | |
285 # so they stay in the same relative position. | |
286 subnodes.append( (c, last_key) ) | |
287 | |
288 # Sort the subnode list. | |
289 subnodes.sort(key=lambda pair: pair[1]) | |
290 | |
291 # Re-add the subnodes, transforming each recursively. | |
292 while node.firstChild: | |
293 node.removeChild(node.firstChild) | |
294 for (c, _) in subnodes: | |
295 unsafeAppendChild(node, TransformByAlphabetizing(c)) | |
296 return node | |
297 | |
298 # Recursively handle other element nodes and other node types. | |
299 for c in node.childNodes: TransformByAlphabetizing(c) | |
300 return node | |
301 | |
302 | |
303 def PrettyPrint(raw_xml): | |
304 """Pretty-print the given XML. | |
305 | |
306 Args: | |
307 xml: The contents of the histograms XML file, as a string. | |
308 | |
309 Returns: | |
310 The pretty-printed version. | |
311 """ | |
312 tree = xml.dom.minidom.parseString(raw_xml) | |
313 tree = TransformByAlphabetizing(tree) | |
314 return PrettyPrintNode(tree) | |
315 | |
316 | |
317 def main(): | |
318 logging.basicConfig(level=logging.INFO) | |
319 | |
320 presubmit = ('--presubmit' in sys.argv) | |
321 | |
322 histograms_filename = 'histograms.xml' | |
323 histograms_backup_filename = 'histograms.before.pretty-print.xml' | |
324 | |
325 script_dir = path_utils.ScriptDir() | |
326 | |
327 histograms_pathname = os.path.join(script_dir, histograms_filename) | |
328 histograms_backup_pathname = os.path.join(script_dir, | |
329 histograms_backup_filename) | |
330 | |
331 logging.info('Loading %s...' % histograms_filename) | |
332 with open(histograms_pathname, 'rb') as f: | |
333 xml = f.read() | |
334 | |
335 # Check there are no CR ('\r') characters in the file. | |
336 if '\r' in xml: | |
337 logging.info('DOS-style line endings (CR characters) detected - these are ' | |
338 'not allowed. Please run dos2unix %s' % histograms_filename) | |
339 sys.exit(1) | |
340 | |
341 logging.info('Pretty-printing...') | |
342 try: | |
343 pretty = PrettyPrint(xml) | |
344 except Error: | |
345 logging.error('Aborting parsing due to fatal errors.') | |
346 sys.exit(1) | |
347 | |
348 if xml == pretty: | |
349 logging.info('%s is correctly pretty-printed.' % histograms_filename) | |
350 sys.exit(0) | |
351 if presubmit: | |
352 logging.info('%s is not formatted correctly; run pretty_print.py to fix.' % | |
353 histograms_filename) | |
354 sys.exit(1) | |
355 if not diffutil.PromptUserToAcceptDiff( | |
356 xml, pretty, | |
357 'Is the prettified version acceptable?'): | |
358 logging.error('Aborting') | |
359 return | |
360 | |
361 logging.info('Creating backup file %s' % histograms_backup_filename) | |
362 shutil.move(histograms_pathname, histograms_backup_pathname) | |
363 | |
364 logging.info('Writing new %s file' % histograms_filename) | |
365 with open(histograms_pathname, 'wb') as f: | |
366 f.write(pretty) | |
367 | |
368 | |
369 if __name__ == '__main__': | |
370 main() | |
OLD | NEW |