OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | 2 # Copyright 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text | 6 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text |
7 at 80 chars, enforcing standard attribute ordering, and standardizing | 7 at 80 chars, enforcing standard attribute ordering, and standardizing |
8 indentation. | 8 indentation. |
9 | 9 |
10 This is quite a bit more complicated than just calling tree.toprettyxml(); | 10 This is quite a bit more complicated than just calling tree.toprettyxml(); |
11 we need additional customization, like special attribute ordering in tags | 11 we need additional customization, like special attribute ordering in tags |
12 and wrapping text nodes, so we implement our own full custom XML pretty-printer. | 12 and wrapping text nodes, so we implement our own full custom XML pretty-printer. |
13 """ | 13 """ |
14 | 14 |
15 from __future__ import with_statement | 15 from __future__ import with_statement |
16 | 16 |
| 17 import diffutil |
| 18 import json |
17 import logging | 19 import logging |
18 import os | 20 import os |
19 import shutil | 21 import shutil |
20 import sys | 22 import sys |
| 23 import textwrap |
21 import xml.dom.minidom | 24 import xml.dom.minidom |
22 | 25 |
23 import print_style | |
24 | |
25 sys.path.insert(1, os.path.join(sys.path[0], '..', '..', 'python')) | 26 sys.path.insert(1, os.path.join(sys.path[0], '..', '..', 'python')) |
26 from google import path_utils | 27 from google import path_utils |
27 | 28 |
28 # Import the metrics/common module. | 29 WRAP_COLUMN = 80 |
29 sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) | 30 |
30 import diff_util | 31 # Desired order for tag attributes; attributes listed here will appear first, |
| 32 # and in the same order as in these lists. |
| 33 # { tag_name: [attribute_name, ...] } |
| 34 ATTRIBUTE_ORDER = { |
| 35 'enum': ['name', 'type'], |
| 36 'histogram': ['name', 'enum', 'units'], |
| 37 'int': ['value', 'label'], |
| 38 'fieldtrial': ['name', 'separator', 'ordering'], |
| 39 'group': ['name', 'label'], |
| 40 'affected-histogram': ['name'], |
| 41 'with-group': ['name'], |
| 42 } |
| 43 |
| 44 # Tag names for top-level nodes whose children we don't want to indent. |
| 45 TAGS_THAT_DONT_INDENT = [ |
| 46 'histogram-configuration', |
| 47 'histograms', |
| 48 'fieldtrials', |
| 49 'enums' |
| 50 ] |
| 51 |
| 52 # Extra vertical spacing rules for special tag names. |
| 53 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)} |
| 54 TAGS_THAT_HAVE_EXTRA_NEWLINE = { |
| 55 'histogram-configuration': (2, 1, 1), |
| 56 'histograms': (2, 1, 1), |
| 57 'fieldtrials': (2, 1, 1), |
| 58 'enums': (2, 1, 1), |
| 59 'histogram': (1, 1, 1), |
| 60 'enum': (1, 1, 1), |
| 61 'fieldtrial': (1, 1, 1), |
| 62 } |
| 63 |
| 64 # Tags that we allow to be squished into a single line for brevity. |
| 65 TAGS_THAT_ALLOW_SINGLE_LINE = [ |
| 66 'summary', |
| 67 'int', |
| 68 ] |
31 | 69 |
32 # Tags whose children we want to alphabetize. The key is the parent tag name, | 70 # Tags whose children we want to alphabetize. The key is the parent tag name, |
33 # and the value is a pair of the tag name of the children we want to sort, | 71 # and the value is a pair of the tag name of the children we want to sort, |
34 # and a key function that maps each child node to the desired sort key. | 72 # and a key function that maps each child node to the desired sort key. |
35 ALPHABETIZATION_RULES = { | 73 ALPHABETIZATION_RULES = { |
36 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()), | 74 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()), |
37 'enums': ('enum', lambda n: n.attributes['name'].value.lower()), | 75 'enums': ('enum', lambda n: n.attributes['name'].value.lower()), |
38 'enum': ('int', lambda n: int(n.attributes['value'].value)), | 76 'enum': ('int', lambda n: int(n.attributes['value'].value)), |
39 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()), | 77 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()), |
40 'fieldtrial': ('affected-histogram', | 78 'fieldtrial': ('affected-histogram', |
41 lambda n: n.attributes['name'].value.lower()), | 79 lambda n: n.attributes['name'].value.lower()), |
42 } | 80 } |
43 | 81 |
44 | 82 |
45 class Error(Exception): | 83 class Error(Exception): |
46 pass | 84 pass |
47 | 85 |
48 | 86 |
| 87 def LastLineLength(s): |
| 88 """Returns the length of the last line in s. |
| 89 |
| 90 Args: |
| 91 s: A multi-line string, including newlines. |
| 92 |
| 93 Returns: |
| 94 The length of the last line in s, in characters. |
| 95 """ |
| 96 if s.rfind('\n') == -1: return len(s) |
| 97 return len(s) - s.rfind('\n') - len('\n') |
| 98 |
| 99 |
| 100 def XmlEscape(s): |
| 101 """XML-escapes the given string, replacing magic characters (&<>") with their |
| 102 escaped equivalents.""" |
| 103 s = s.replace("&", "&").replace("<", "<") |
| 104 s = s.replace("\"", """).replace(">", ">") |
| 105 return s |
| 106 |
| 107 |
| 108 def PrettyPrintNode(node, indent=0): |
| 109 """Pretty-prints the given XML node at the given indent level. |
| 110 |
| 111 Args: |
| 112 node: The minidom node to pretty-print. |
| 113 indent: The current indent level. |
| 114 |
| 115 Returns: |
| 116 The pretty-printed string (including embedded newlines). |
| 117 |
| 118 Raises: |
| 119 Error if the XML has unknown tags or attributes. |
| 120 """ |
| 121 # Handle the top-level document node. |
| 122 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: |
| 123 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes]) |
| 124 |
| 125 # Handle text nodes. |
| 126 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: |
| 127 # Wrap each paragraph in the text to fit in the 80 column limit. |
| 128 wrapper = textwrap.TextWrapper() |
| 129 wrapper.initial_indent = ' ' * indent |
| 130 wrapper.subsequent_indent = ' ' * indent |
| 131 wrapper.break_on_hyphens = False |
| 132 wrapper.break_long_words = False |
| 133 wrapper.width = WRAP_COLUMN |
| 134 text = XmlEscape(node.data) |
| 135 # Remove any common indent. |
| 136 text = textwrap.dedent(text.strip('\n')) |
| 137 lines = text.split('\n') |
| 138 # Split the text into paragraphs at blank line boundaries. |
| 139 paragraphs = [[]] |
| 140 for l in lines: |
| 141 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0: |
| 142 paragraphs.append([]) |
| 143 else: |
| 144 paragraphs[-1].append(l) |
| 145 # Remove trailing empty paragraph if present. |
| 146 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: |
| 147 paragraphs = paragraphs[:-1] |
| 148 # Wrap each paragraph and separate with two newlines. |
| 149 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) |
| 150 |
| 151 # Handle element nodes. |
| 152 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: |
| 153 newlines_after_open, newlines_before_close, newlines_after_close = ( |
| 154 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0))) |
| 155 # Open the tag. |
| 156 s = ' ' * indent + '<' + node.tagName |
| 157 |
| 158 # Calculate how much space to allow for the '>' or '/>'. |
| 159 closing_chars = 1 |
| 160 if not node.childNodes: |
| 161 closing_chars = 2 |
| 162 |
| 163 # Pretty-print the attributes. |
| 164 attributes = node.attributes.keys() |
| 165 if attributes: |
| 166 # Reorder the attributes. |
| 167 if not node.tagName in ATTRIBUTE_ORDER: |
| 168 unrecognized_attributes = attributes; |
| 169 else: |
| 170 unrecognized_attributes = ( |
| 171 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]]) |
| 172 attributes = ( |
| 173 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes]) |
| 174 |
| 175 for a in unrecognized_attributes: |
| 176 logging.error( |
| 177 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) |
| 178 if unrecognized_attributes: |
| 179 raise Error() |
| 180 |
| 181 for a in attributes: |
| 182 value = XmlEscape(node.attributes[a].value) |
| 183 # Replace sequences of whitespace with single spaces. |
| 184 words = value.split() |
| 185 a_str = ' %s="%s"' % (a, ' '.join(words)) |
| 186 # Start a new line if the attribute will make this line too long. |
| 187 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN: |
| 188 s += '\n' + ' ' * (indent + 3) |
| 189 # Output everything up to the first quote. |
| 190 s += ' %s="' % (a) |
| 191 value_indent_level = LastLineLength(s) |
| 192 # Output one word at a time, splitting to the next line where necessary. |
| 193 column = value_indent_level |
| 194 for i, word in enumerate(words): |
| 195 # This is slightly too conservative since not every word will be |
| 196 # followed by the closing characters... |
| 197 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN): |
| 198 s = s.rstrip() # remove any trailing whitespace |
| 199 s += '\n' + ' ' * value_indent_level |
| 200 column = value_indent_level |
| 201 s += word + ' ' |
| 202 column += len(word) + 1 |
| 203 s = s.rstrip() # remove any trailing whitespace |
| 204 s += '"' |
| 205 s = s.rstrip() # remove any trailing whitespace |
| 206 |
| 207 # Pretty-print the child nodes. |
| 208 if node.childNodes: |
| 209 s += '>' |
| 210 # Calculate the new indent level for child nodes. |
| 211 new_indent = indent |
| 212 if node.tagName not in TAGS_THAT_DONT_INDENT: |
| 213 new_indent += 2 |
| 214 child_nodes = node.childNodes |
| 215 |
| 216 # Recursively pretty-print the child nodes. |
| 217 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes] |
| 218 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] |
| 219 |
| 220 # Determine whether we can fit the entire node on a single line. |
| 221 close_tag = '</%s>' % node.tagName |
| 222 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) |
| 223 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and |
| 224 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): |
| 225 s += child_nodes[0].strip() |
| 226 else: |
| 227 s += '\n' * newlines_after_open + '\n'.join(child_nodes) |
| 228 s += '\n' * newlines_before_close + ' ' * indent |
| 229 s += close_tag |
| 230 else: |
| 231 s += '/>' |
| 232 s += '\n' * newlines_after_close |
| 233 return s |
| 234 |
| 235 # Handle comment nodes. |
| 236 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: |
| 237 return '<!--%s-->\n' % node.data |
| 238 |
| 239 # Ignore other node types. This could be a processing instruction (<? ... ?>) |
| 240 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the |
| 241 # histograms XML at present. |
| 242 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) |
| 243 raise Error() |
| 244 |
| 245 |
49 def unsafeAppendChild(parent, child): | 246 def unsafeAppendChild(parent, child): |
50 """Append child to parent's list of children, ignoring the possibility that it | 247 """Append child to parent's list of children, ignoring the possibility that it |
51 is already in another node's childNodes list. Requires that the previous | 248 is already in another node's childNodes list. Requires that the previous |
52 parent of child is discarded (to avoid non-tree DOM graphs). | 249 parent of child is discarded (to avoid non-tree DOM graphs). |
53 This can provide a significant speedup as O(n^2) operations are removed (in | 250 This can provide a significant speedup as O(n^2) operations are removed (in |
54 particular, each child insertion avoids the need to traverse the old parent's | 251 particular, each child insertion avoids the need to traverse the old parent's |
55 entire list of children).""" | 252 entire list of children).""" |
56 child.parentNode = None | 253 child.parentNode = None |
57 parent.appendChild(child) | 254 parent.appendChild(child) |
58 child.parentNode = parent | 255 child.parentNode = parent |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
100 | 297 |
101 # Recursively handle other element nodes and other node types. | 298 # Recursively handle other element nodes and other node types. |
102 for c in node.childNodes: TransformByAlphabetizing(c) | 299 for c in node.childNodes: TransformByAlphabetizing(c) |
103 return node | 300 return node |
104 | 301 |
105 | 302 |
106 def PrettyPrint(raw_xml): | 303 def PrettyPrint(raw_xml): |
107 """Pretty-print the given XML. | 304 """Pretty-print the given XML. |
108 | 305 |
109 Args: | 306 Args: |
110 raw_xml: The contents of the histograms XML file, as a string. | 307 xml: The contents of the histograms XML file, as a string. |
111 | 308 |
112 Returns: | 309 Returns: |
113 The pretty-printed version. | 310 The pretty-printed version. |
114 """ | 311 """ |
115 tree = xml.dom.minidom.parseString(raw_xml) | 312 tree = xml.dom.minidom.parseString(raw_xml) |
116 tree = TransformByAlphabetizing(tree) | 313 tree = TransformByAlphabetizing(tree) |
117 return print_style.GetPrintStyle().PrettyPrintNode(tree) | 314 return PrettyPrintNode(tree) |
118 | 315 |
119 | 316 |
120 def main(): | 317 def main(): |
121 logging.basicConfig(level=logging.INFO) | 318 logging.basicConfig(level=logging.INFO) |
122 | 319 |
123 presubmit = ('--presubmit' in sys.argv) | 320 presubmit = ('--presubmit' in sys.argv) |
124 | 321 |
125 histograms_filename = 'histograms.xml' | 322 histograms_filename = 'histograms.xml' |
126 histograms_backup_filename = 'histograms.before.pretty-print.xml' | 323 histograms_backup_filename = 'histograms.before.pretty-print.xml' |
127 | 324 |
(...skipping 24 matching lines...) Expand all Loading... |
152 logging.error('Aborting parsing due to fatal errors.') | 349 logging.error('Aborting parsing due to fatal errors.') |
153 sys.exit(1) | 350 sys.exit(1) |
154 | 351 |
155 if xml == pretty: | 352 if xml == pretty: |
156 logging.info('%s is correctly pretty-printed.' % histograms_filename) | 353 logging.info('%s is correctly pretty-printed.' % histograms_filename) |
157 sys.exit(0) | 354 sys.exit(0) |
158 if presubmit: | 355 if presubmit: |
159 logging.info('%s is not formatted correctly; run pretty_print.py to fix.' % | 356 logging.info('%s is not formatted correctly; run pretty_print.py to fix.' % |
160 histograms_filename) | 357 histograms_filename) |
161 sys.exit(1) | 358 sys.exit(1) |
162 if not diff_util.PromptUserToAcceptDiff( | 359 if not diffutil.PromptUserToAcceptDiff( |
163 xml, pretty, | 360 xml, pretty, |
164 'Is the prettified version acceptable?'): | 361 'Is the prettified version acceptable?'): |
165 logging.error('Aborting') | 362 logging.error('Aborting') |
166 return | 363 return |
167 | 364 |
168 logging.info('Creating backup file %s' % histograms_backup_filename) | 365 logging.info('Creating backup file %s' % histograms_backup_filename) |
169 shutil.move(histograms_pathname, histograms_backup_pathname) | 366 shutil.move(histograms_pathname, histograms_backup_pathname) |
170 | 367 |
171 logging.info('Writing new %s file' % histograms_filename) | 368 logging.info('Writing new %s file' % histograms_filename) |
172 with open(histograms_pathname, 'wb') as f: | 369 with open(histograms_pathname, 'wb') as f: |
173 f.write(pretty) | 370 f.write(pretty) |
174 | 371 |
175 | 372 |
176 if __name__ == '__main__': | 373 if __name__ == '__main__': |
177 main() | 374 main() |
OLD | NEW |