OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | 2 # Copyright 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text | 6 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text |
7 at 80 chars, enforcing standard attribute ordering, and standardizing | 7 at 80 chars, enforcing standard attribute ordering, and standardizing |
8 indentation. | 8 indentation. |
9 | 9 |
10 This is quite a bit more complicated than just calling tree.toprettyxml(); | 10 This is quite a bit more complicated than just calling tree.toprettyxml(); |
11 we need additional customization, like special attribute ordering in tags | 11 we need additional customization, like special attribute ordering in tags |
12 and wrapping text nodes, so we implement our own full custom XML pretty-printer. | 12 and wrapping text nodes, so we implement our own full custom XML pretty-printer. |
13 """ | 13 """ |
14 | 14 |
15 from __future__ import with_statement | 15 from __future__ import with_statement |
16 | 16 |
17 import diffutil | |
18 import json | |
19 import logging | 17 import logging |
20 import os | 18 import os |
21 import shutil | 19 import shutil |
22 import sys | 20 import sys |
23 import textwrap | |
24 import xml.dom.minidom | 21 import xml.dom.minidom |
25 | 22 |
| 23 import print_style |
| 24 |
26 sys.path.insert(1, os.path.join(sys.path[0], '..', '..', 'python')) | 25 sys.path.insert(1, os.path.join(sys.path[0], '..', '..', 'python')) |
27 from google import path_utils | 26 from google import path_utils |
28 | 27 |
29 WRAP_COLUMN = 80 | 28 # Import the metrics/common module. |
30 | 29 sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) |
31 # Desired order for tag attributes; attributes listed here will appear first, | 30 import diff_util |
32 # and in the same order as in these lists. | |
33 # { tag_name: [attribute_name, ...] } | |
34 ATTRIBUTE_ORDER = { | |
35 'enum': ['name', 'type'], | |
36 'histogram': ['name', 'enum', 'units'], | |
37 'int': ['value', 'label'], | |
38 'fieldtrial': ['name', 'separator', 'ordering'], | |
39 'group': ['name', 'label'], | |
40 'affected-histogram': ['name'], | |
41 'with-group': ['name'], | |
42 } | |
43 | |
44 # Tag names for top-level nodes whose children we don't want to indent. | |
45 TAGS_THAT_DONT_INDENT = [ | |
46 'histogram-configuration', | |
47 'histograms', | |
48 'fieldtrials', | |
49 'enums' | |
50 ] | |
51 | |
52 # Extra vertical spacing rules for special tag names. | |
53 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)} | |
54 TAGS_THAT_HAVE_EXTRA_NEWLINE = { | |
55 'histogram-configuration': (2, 1, 1), | |
56 'histograms': (2, 1, 1), | |
57 'fieldtrials': (2, 1, 1), | |
58 'enums': (2, 1, 1), | |
59 'histogram': (1, 1, 1), | |
60 'enum': (1, 1, 1), | |
61 'fieldtrial': (1, 1, 1), | |
62 } | |
63 | |
64 # Tags that we allow to be squished into a single line for brevity. | |
65 TAGS_THAT_ALLOW_SINGLE_LINE = [ | |
66 'summary', | |
67 'int', | |
68 ] | |
69 | 31 |
70 # Tags whose children we want to alphabetize. The key is the parent tag name, | 32 # Tags whose children we want to alphabetize. The key is the parent tag name, |
71 # and the value is a pair of the tag name of the children we want to sort, | 33 # and the value is a pair of the tag name of the children we want to sort, |
72 # and a key function that maps each child node to the desired sort key. | 34 # and a key function that maps each child node to the desired sort key. |
73 ALPHABETIZATION_RULES = { | 35 ALPHABETIZATION_RULES = { |
74 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()), | 36 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()), |
75 'enums': ('enum', lambda n: n.attributes['name'].value.lower()), | 37 'enums': ('enum', lambda n: n.attributes['name'].value.lower()), |
76 'enum': ('int', lambda n: int(n.attributes['value'].value)), | 38 'enum': ('int', lambda n: int(n.attributes['value'].value)), |
77 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()), | 39 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()), |
78 'fieldtrial': ('affected-histogram', | 40 'fieldtrial': ('affected-histogram', |
79 lambda n: n.attributes['name'].value.lower()), | 41 lambda n: n.attributes['name'].value.lower()), |
80 } | 42 } |
81 | 43 |
82 | 44 |
83 class Error(Exception): | 45 class Error(Exception): |
84 pass | 46 pass |
85 | 47 |
86 | 48 |
87 def LastLineLength(s): | |
88 """Returns the length of the last line in s. | |
89 | |
90 Args: | |
91 s: A multi-line string, including newlines. | |
92 | |
93 Returns: | |
94 The length of the last line in s, in characters. | |
95 """ | |
96 if s.rfind('\n') == -1: return len(s) | |
97 return len(s) - s.rfind('\n') - len('\n') | |
98 | |
99 | |
100 def XmlEscape(s): | |
101 """XML-escapes the given string, replacing magic characters (&<>") with their | |
102 escaped equivalents.""" | |
103 s = s.replace("&", "&").replace("<", "<") | |
104 s = s.replace("\"", """).replace(">", ">") | |
105 return s | |
106 | |
107 | |
108 def PrettyPrintNode(node, indent=0): | |
109 """Pretty-prints the given XML node at the given indent level. | |
110 | |
111 Args: | |
112 node: The minidom node to pretty-print. | |
113 indent: The current indent level. | |
114 | |
115 Returns: | |
116 The pretty-printed string (including embedded newlines). | |
117 | |
118 Raises: | |
119 Error if the XML has unknown tags or attributes. | |
120 """ | |
121 # Handle the top-level document node. | |
122 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: | |
123 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes]) | |
124 | |
125 # Handle text nodes. | |
126 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: | |
127 # Wrap each paragraph in the text to fit in the 80 column limit. | |
128 wrapper = textwrap.TextWrapper() | |
129 wrapper.initial_indent = ' ' * indent | |
130 wrapper.subsequent_indent = ' ' * indent | |
131 wrapper.break_on_hyphens = False | |
132 wrapper.break_long_words = False | |
133 wrapper.width = WRAP_COLUMN | |
134 text = XmlEscape(node.data) | |
135 # Remove any common indent. | |
136 text = textwrap.dedent(text.strip('\n')) | |
137 lines = text.split('\n') | |
138 # Split the text into paragraphs at blank line boundaries. | |
139 paragraphs = [[]] | |
140 for l in lines: | |
141 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0: | |
142 paragraphs.append([]) | |
143 else: | |
144 paragraphs[-1].append(l) | |
145 # Remove trailing empty paragraph if present. | |
146 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: | |
147 paragraphs = paragraphs[:-1] | |
148 # Wrap each paragraph and separate with two newlines. | |
149 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) | |
150 | |
151 # Handle element nodes. | |
152 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: | |
153 newlines_after_open, newlines_before_close, newlines_after_close = ( | |
154 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0))) | |
155 # Open the tag. | |
156 s = ' ' * indent + '<' + node.tagName | |
157 | |
158 # Calculate how much space to allow for the '>' or '/>'. | |
159 closing_chars = 1 | |
160 if not node.childNodes: | |
161 closing_chars = 2 | |
162 | |
163 # Pretty-print the attributes. | |
164 attributes = node.attributes.keys() | |
165 if attributes: | |
166 # Reorder the attributes. | |
167 if not node.tagName in ATTRIBUTE_ORDER: | |
168 unrecognized_attributes = attributes; | |
169 else: | |
170 unrecognized_attributes = ( | |
171 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]]) | |
172 attributes = ( | |
173 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes]) | |
174 | |
175 for a in unrecognized_attributes: | |
176 logging.error( | |
177 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) | |
178 if unrecognized_attributes: | |
179 raise Error() | |
180 | |
181 for a in attributes: | |
182 value = XmlEscape(node.attributes[a].value) | |
183 # Replace sequences of whitespace with single spaces. | |
184 words = value.split() | |
185 a_str = ' %s="%s"' % (a, ' '.join(words)) | |
186 # Start a new line if the attribute will make this line too long. | |
187 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN: | |
188 s += '\n' + ' ' * (indent + 3) | |
189 # Output everything up to the first quote. | |
190 s += ' %s="' % (a) | |
191 value_indent_level = LastLineLength(s) | |
192 # Output one word at a time, splitting to the next line where necessary. | |
193 column = value_indent_level | |
194 for i, word in enumerate(words): | |
195 # This is slightly too conservative since not every word will be | |
196 # followed by the closing characters... | |
197 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN): | |
198 s = s.rstrip() # remove any trailing whitespace | |
199 s += '\n' + ' ' * value_indent_level | |
200 column = value_indent_level | |
201 s += word + ' ' | |
202 column += len(word) + 1 | |
203 s = s.rstrip() # remove any trailing whitespace | |
204 s += '"' | |
205 s = s.rstrip() # remove any trailing whitespace | |
206 | |
207 # Pretty-print the child nodes. | |
208 if node.childNodes: | |
209 s += '>' | |
210 # Calculate the new indent level for child nodes. | |
211 new_indent = indent | |
212 if node.tagName not in TAGS_THAT_DONT_INDENT: | |
213 new_indent += 2 | |
214 child_nodes = node.childNodes | |
215 | |
216 # Recursively pretty-print the child nodes. | |
217 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes] | |
218 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] | |
219 | |
220 # Determine whether we can fit the entire node on a single line. | |
221 close_tag = '</%s>' % node.tagName | |
222 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) | |
223 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and | |
224 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): | |
225 s += child_nodes[0].strip() | |
226 else: | |
227 s += '\n' * newlines_after_open + '\n'.join(child_nodes) | |
228 s += '\n' * newlines_before_close + ' ' * indent | |
229 s += close_tag | |
230 else: | |
231 s += '/>' | |
232 s += '\n' * newlines_after_close | |
233 return s | |
234 | |
235 # Handle comment nodes. | |
236 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: | |
237 return '<!--%s-->\n' % node.data | |
238 | |
239 # Ignore other node types. This could be a processing instruction (<? ... ?>) | |
240 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the | |
241 # histograms XML at present. | |
242 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) | |
243 raise Error() | |
244 | |
245 | |
246 def unsafeAppendChild(parent, child): | 49 def unsafeAppendChild(parent, child): |
247 """Append child to parent's list of children, ignoring the possibility that it | 50 """Append child to parent's list of children, ignoring the possibility that it |
248 is already in another node's childNodes list. Requires that the previous | 51 is already in another node's childNodes list. Requires that the previous |
249 parent of child is discarded (to avoid non-tree DOM graphs). | 52 parent of child is discarded (to avoid non-tree DOM graphs). |
250 This can provide a significant speedup as O(n^2) operations are removed (in | 53 This can provide a significant speedup as O(n^2) operations are removed (in |
251 particular, each child insertion avoids the need to traverse the old parent's | 54 particular, each child insertion avoids the need to traverse the old parent's |
252 entire list of children).""" | 55 entire list of children).""" |
253 child.parentNode = None | 56 child.parentNode = None |
254 parent.appendChild(child) | 57 parent.appendChild(child) |
255 child.parentNode = parent | 58 child.parentNode = parent |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
297 | 100 |
298 # Recursively handle other element nodes and other node types. | 101 # Recursively handle other element nodes and other node types. |
299 for c in node.childNodes: TransformByAlphabetizing(c) | 102 for c in node.childNodes: TransformByAlphabetizing(c) |
300 return node | 103 return node |
301 | 104 |
302 | 105 |
303 def PrettyPrint(raw_xml): | 106 def PrettyPrint(raw_xml): |
304 """Pretty-print the given XML. | 107 """Pretty-print the given XML. |
305 | 108 |
306 Args: | 109 Args: |
307 xml: The contents of the histograms XML file, as a string. | 110 raw_xml: The contents of the histograms XML file, as a string. |
308 | 111 |
309 Returns: | 112 Returns: |
310 The pretty-printed version. | 113 The pretty-printed version. |
311 """ | 114 """ |
312 tree = xml.dom.minidom.parseString(raw_xml) | 115 tree = xml.dom.minidom.parseString(raw_xml) |
313 tree = TransformByAlphabetizing(tree) | 116 tree = TransformByAlphabetizing(tree) |
314 return PrettyPrintNode(tree) | 117 return print_style.GetPrintStyle().PrettyPrintNode(tree) |
315 | 118 |
316 | 119 |
317 def main(): | 120 def main(): |
318 logging.basicConfig(level=logging.INFO) | 121 logging.basicConfig(level=logging.INFO) |
319 | 122 |
320 presubmit = ('--presubmit' in sys.argv) | 123 presubmit = ('--presubmit' in sys.argv) |
321 | 124 |
322 histograms_filename = 'histograms.xml' | 125 histograms_filename = 'histograms.xml' |
323 histograms_backup_filename = 'histograms.before.pretty-print.xml' | 126 histograms_backup_filename = 'histograms.before.pretty-print.xml' |
324 | 127 |
(...skipping 24 matching lines...) Expand all Loading... |
349 logging.error('Aborting parsing due to fatal errors.') | 152 logging.error('Aborting parsing due to fatal errors.') |
350 sys.exit(1) | 153 sys.exit(1) |
351 | 154 |
352 if xml == pretty: | 155 if xml == pretty: |
353 logging.info('%s is correctly pretty-printed.' % histograms_filename) | 156 logging.info('%s is correctly pretty-printed.' % histograms_filename) |
354 sys.exit(0) | 157 sys.exit(0) |
355 if presubmit: | 158 if presubmit: |
356 logging.info('%s is not formatted correctly; run pretty_print.py to fix.' % | 159 logging.info('%s is not formatted correctly; run pretty_print.py to fix.' % |
357 histograms_filename) | 160 histograms_filename) |
358 sys.exit(1) | 161 sys.exit(1) |
359 if not diffutil.PromptUserToAcceptDiff( | 162 if not diff_util.PromptUserToAcceptDiff( |
360 xml, pretty, | 163 xml, pretty, |
361 'Is the prettified version acceptable?'): | 164 'Is the prettified version acceptable?'): |
362 logging.error('Aborting') | 165 logging.error('Aborting') |
363 return | 166 return |
364 | 167 |
365 logging.info('Creating backup file %s' % histograms_backup_filename) | 168 logging.info('Creating backup file %s' % histograms_backup_filename) |
366 shutil.move(histograms_pathname, histograms_backup_pathname) | 169 shutil.move(histograms_pathname, histograms_backup_pathname) |
367 | 170 |
368 logging.info('Writing new %s file' % histograms_filename) | 171 logging.info('Writing new %s file' % histograms_filename) |
369 with open(histograms_pathname, 'wb') as f: | 172 with open(histograms_pathname, 'wb') as f: |
370 f.write(pretty) | 173 f.write(pretty) |
371 | 174 |
372 | 175 |
373 if __name__ == '__main__': | 176 if __name__ == '__main__': |
374 main() | 177 main() |
OLD | NEW |