Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 5 | 4 |
| 6 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text | 5 """Utility file for pretty print xml file.""" |
|
Alexei Svitkine (slow)
2014/02/03 19:37:34
This description should be more clear, otherwise i
yao
2014/02/04 19:08:12
Done.
| |
| 7 at 80 chars, enforcing standard attribute ordering, and standardizing | |
| 8 indentation. | |
| 9 | 6 |
| 10 This is quite a bit more complicated than just calling tree.toprettyxml(); | |
| 11 we need additional customization, like special attribute ordering in tags | |
| 12 and wrapping text nodes, so we implement our own full custom XML pretty-printer. | |
| 13 """ | |
| 14 | |
| 15 from __future__ import with_statement | |
| 16 | |
| 17 import diffutil | |
| 18 import json | |
| 19 import logging | 7 import logging |
| 20 import os | |
| 21 import shutil | |
| 22 import sys | |
| 23 import textwrap | 8 import textwrap |
| 24 import xml.dom.minidom | 9 import xml.dom.minidom |
| 25 | 10 |
| 26 sys.path.insert(1, os.path.join(sys.path[0], '..', '..', 'python')) | |
| 27 from google import path_utils | |
| 28 | |
| 29 WRAP_COLUMN = 80 | 11 WRAP_COLUMN = 80 |
| 30 | 12 |
| 31 # Desired order for tag attributes; attributes listed here will appear first, | |
| 32 # and in the same order as in these lists. | |
| 33 # { tag_name: [attribute_name, ...] } | |
| 34 ATTRIBUTE_ORDER = { | |
| 35 'enum': ['name', 'type'], | |
| 36 'histogram': ['name', 'enum', 'units'], | |
| 37 'int': ['value', 'label'], | |
| 38 'fieldtrial': ['name', 'separator', 'ordering'], | |
| 39 'group': ['name', 'label'], | |
| 40 'affected-histogram': ['name'], | |
| 41 'with-group': ['name'], | |
| 42 } | |
| 43 | |
| 44 # Tag names for top-level nodes whose children we don't want to indent. | |
| 45 TAGS_THAT_DONT_INDENT = [ | |
| 46 'histogram-configuration', | |
| 47 'histograms', | |
| 48 'fieldtrials', | |
| 49 'enums' | |
| 50 ] | |
| 51 | |
| 52 # Extra vertical spacing rules for special tag names. | |
| 53 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)} | |
| 54 TAGS_THAT_HAVE_EXTRA_NEWLINE = { | |
| 55 'histogram-configuration': (2, 1, 1), | |
| 56 'histograms': (2, 1, 1), | |
| 57 'fieldtrials': (2, 1, 1), | |
| 58 'enums': (2, 1, 1), | |
| 59 'histogram': (1, 1, 1), | |
| 60 'enum': (1, 1, 1), | |
| 61 'fieldtrial': (1, 1, 1), | |
| 62 } | |
| 63 | |
| 64 # Tags that we allow to be squished into a single line for brevity. | |
| 65 TAGS_THAT_ALLOW_SINGLE_LINE = [ | |
| 66 'summary', | |
| 67 'int', | |
| 68 ] | |
| 69 | |
| 70 # Tags whose children we want to alphabetize. The key is the parent tag name, | |
| 71 # and the value is a pair of the tag name of the children we want to sort, | |
| 72 # and a key function that maps each child node to the desired sort key. | |
| 73 ALPHABETIZATION_RULES = { | |
| 74 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()), | |
| 75 'enums': ('enum', lambda n: n.attributes['name'].value.lower()), | |
| 76 'enum': ('int', lambda n: int(n.attributes['value'].value)), | |
| 77 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()), | |
| 78 'fieldtrial': ('affected-histogram', | |
| 79 lambda n: n.attributes['name'].value.lower()), | |
| 80 } | |
| 81 | |
| 82 | 13 |
| 83 class Error(Exception): | 14 class Error(Exception): |
| 84 pass | 15 pass |
| 85 | 16 |
| 86 | 17 |
| 87 def LastLineLength(s): | 18 def LastLineLength(s): |
| 88 """Returns the length of the last line in s. | 19 """Returns the length of the last line in s. |
| 89 | 20 |
| 90 Args: | 21 Args: |
| 91 s: A multi-line string, including newlines. | 22 s: A multi-line string, including newlines. |
| 92 | 23 |
| 93 Returns: | 24 Returns: |
| 94 The length of the last line in s, in characters. | 25 The length of the last line in s, in characters. |
| 95 """ | 26 """ |
| 96 if s.rfind('\n') == -1: return len(s) | 27 if s.rfind('\n') == -1: return len(s) |
| 97 return len(s) - s.rfind('\n') - len('\n') | 28 return len(s) - s.rfind('\n') - len('\n') |
| 98 | 29 |
| 99 | 30 |
| 100 def XmlEscape(s): | 31 def XmlEscape(s): |
| 101 """XML-escapes the given string, replacing magic characters (&<>") with their | 32 """XML-escapes the given string, replacing magic characters (&<>") with their |
| 102 escaped equivalents.""" | 33 escaped equivalents.""" |
| 103 s = s.replace("&", "&").replace("<", "<") | 34 s = s.replace("&", "&").replace("<", "<") |
| 104 s = s.replace("\"", """).replace(">", ">") | 35 s = s.replace("\"", """).replace(">", ">") |
| 105 return s | 36 return s |
| 106 | 37 |
| 107 | 38 |
| 108 def PrettyPrintNode(node, indent=0): | 39 class XmlStyle(object): |
| 40 """A class that stores all style specification for an output xml file.""" | |
| 41 | |
| 42 def __init__(self, attribute_order, tags_that_have_extra_newline, | |
| 43 tags_that_dont_indent, tags_that_allow_single_line): | |
| 44 # List of tag names for top-level nodes whose children are not indented. | |
| 45 self.attribute_order = attribute_order | |
| 46 self.tags_that_have_extra_newline = tags_that_have_extra_newline | |
| 47 self.tags_that_dont_indent = tags_that_dont_indent | |
| 48 self.tags_that_allow_single_line = tags_that_allow_single_line | |
| 49 | |
| 50 | |
| 51 def PrettyPrintNode(node, xml_style, indent=0): | |
|
Alexei Svitkine (slow)
2014/02/03 19:37:34
Now that you have the XmlStyle object, can you mak
yao
2014/02/04 19:08:12
Done.
| |
| 109 """Pretty-prints the given XML node at the given indent level. | 52 """Pretty-prints the given XML node at the given indent level. |
| 110 | 53 |
| 111 Args: | 54 Args: |
| 112 node: The minidom node to pretty-print. | 55 node: The minidom node to pretty-print. |
| 56 xml_style: An XmlStyle object that represents the style requirement of the | |
| 57 output xml file. | |
| 113 indent: The current indent level. | 58 indent: The current indent level. |
| 114 | 59 |
| 115 Returns: | 60 Returns: |
| 116 The pretty-printed string (including embedded newlines). | 61 The pretty-printed string (including embedded newlines). |
| 117 | 62 |
| 118 Raises: | 63 Raises: |
| 119 Error if the XML has unknown tags or attributes. | 64 Error if the XML has unknown tags or attributes. |
| 120 """ | 65 """ |
| 121 # Handle the top-level document node. | 66 # Handle the top-level document node. |
| 122 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: | 67 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: |
| 123 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes]) | 68 return '\n'.join([PrettyPrintNode(n, xml_style) for n in node.childNodes]) |
| 124 | 69 |
| 125 # Handle text nodes. | 70 # Handle text nodes. |
| 126 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: | 71 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: |
| 127 # Wrap each paragraph in the text to fit in the 80 column limit. | 72 # Wrap each paragraph in the text to fit in the 80 column limit. |
| 128 wrapper = textwrap.TextWrapper() | 73 wrapper = textwrap.TextWrapper() |
| 129 wrapper.initial_indent = ' ' * indent | 74 wrapper.initial_indent = ' ' * indent |
| 130 wrapper.subsequent_indent = ' ' * indent | 75 wrapper.subsequent_indent = ' ' * indent |
| 131 wrapper.break_on_hyphens = False | 76 wrapper.break_on_hyphens = False |
| 132 wrapper.break_long_words = False | 77 wrapper.break_long_words = False |
| 133 wrapper.width = WRAP_COLUMN | 78 wrapper.width = WRAP_COLUMN |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 144 paragraphs[-1].append(l) | 89 paragraphs[-1].append(l) |
| 145 # Remove trailing empty paragraph if present. | 90 # Remove trailing empty paragraph if present. |
| 146 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: | 91 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: |
| 147 paragraphs = paragraphs[:-1] | 92 paragraphs = paragraphs[:-1] |
| 148 # Wrap each paragraph and separate with two newlines. | 93 # Wrap each paragraph and separate with two newlines. |
| 149 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) | 94 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) |
| 150 | 95 |
| 151 # Handle element nodes. | 96 # Handle element nodes. |
| 152 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: | 97 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: |
| 153 newlines_after_open, newlines_before_close, newlines_after_close = ( | 98 newlines_after_open, newlines_before_close, newlines_after_close = ( |
| 154 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0))) | 99 xml_style.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0))) |
| 155 # Open the tag. | 100 # Open the tag. |
| 156 s = ' ' * indent + '<' + node.tagName | 101 s = ' ' * indent + '<' + node.tagName |
| 157 | 102 |
| 158 # Calculate how much space to allow for the '>' or '/>'. | 103 # Calculate how much space to allow for the '>' or '/>'. |
| 159 closing_chars = 1 | 104 closing_chars = 1 |
| 160 if not node.childNodes: | 105 if not node.childNodes: |
| 161 closing_chars = 2 | 106 closing_chars = 2 |
| 162 | 107 |
| 163 # Pretty-print the attributes. | 108 # Pretty-print the attributes. |
| 164 attributes = node.attributes.keys() | 109 attributes = node.attributes.keys() |
| 165 if attributes: | 110 if attributes: |
| 166 # Reorder the attributes. | 111 # Reorder the attributes. |
| 167 if not node.tagName in ATTRIBUTE_ORDER: | 112 if node.tagName not in xml_style.attribute_order: |
| 168 unrecognized_attributes = attributes; | 113 unrecognized_attributes = attributes |
| 169 else: | 114 else: |
| 170 unrecognized_attributes = ( | 115 unrecognized_attributes = ( |
| 171 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]]) | 116 [a for a in attributes |
| 172 attributes = ( | 117 if a not in xml_style.attribute_order[node.tagName]]) |
| 173 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes]) | 118 attributes = [a for a in xml_style.attribute_order[node.tagName] |
| 119 if a in attributes] | |
| 174 | 120 |
| 175 for a in unrecognized_attributes: | 121 for a in unrecognized_attributes: |
| 176 logging.error( | 122 logging.error( |
| 177 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) | 123 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) |
| 178 if unrecognized_attributes: | 124 if unrecognized_attributes: |
| 179 raise Error() | 125 raise Error() |
| 180 | 126 |
| 181 for a in attributes: | 127 for a in attributes: |
| 182 value = XmlEscape(node.attributes[a].value) | 128 value = XmlEscape(node.attributes[a].value) |
| 183 # Replace sequences of whitespace with single spaces. | 129 # Replace sequences of whitespace with single spaces. |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 202 column += len(word) + 1 | 148 column += len(word) + 1 |
| 203 s = s.rstrip() # remove any trailing whitespace | 149 s = s.rstrip() # remove any trailing whitespace |
| 204 s += '"' | 150 s += '"' |
| 205 s = s.rstrip() # remove any trailing whitespace | 151 s = s.rstrip() # remove any trailing whitespace |
| 206 | 152 |
| 207 # Pretty-print the child nodes. | 153 # Pretty-print the child nodes. |
| 208 if node.childNodes: | 154 if node.childNodes: |
| 209 s += '>' | 155 s += '>' |
| 210 # Calculate the new indent level for child nodes. | 156 # Calculate the new indent level for child nodes. |
| 211 new_indent = indent | 157 new_indent = indent |
| 212 if node.tagName not in TAGS_THAT_DONT_INDENT: | 158 if node.tagName not in xml_style.tags_that_dont_indent: |
| 213 new_indent += 2 | 159 new_indent += 2 |
| 214 child_nodes = node.childNodes | 160 child_nodes = node.childNodes |
| 215 | 161 |
| 216 # Recursively pretty-print the child nodes. | 162 # Recursively pretty-print the child nodes. |
| 217 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes] | 163 child_nodes = [PrettyPrintNode(n, xml_style, indent=new_indent) |
| 164 for n in child_nodes] | |
| 218 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] | 165 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] |
| 219 | 166 |
| 220 # Determine whether we can fit the entire node on a single line. | 167 # Determine whether we can fit the entire node on a single line. |
| 221 close_tag = '</%s>' % node.tagName | 168 close_tag = '</%s>' % node.tagName |
| 222 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) | 169 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) |
| 223 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and | 170 if (node.tagName in xml_style.tags_that_allow_single_line and |
| 224 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): | 171 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): |
| 225 s += child_nodes[0].strip() | 172 s += child_nodes[0].strip() |
| 226 else: | 173 else: |
| 227 s += '\n' * newlines_after_open + '\n'.join(child_nodes) | 174 s += '\n' * newlines_after_open + '\n'.join(child_nodes) |
| 228 s += '\n' * newlines_before_close + ' ' * indent | 175 s += '\n' * newlines_before_close + ' ' * indent |
| 229 s += close_tag | 176 s += close_tag |
| 230 else: | 177 else: |
| 231 s += '/>' | 178 s += '/>' |
| 232 s += '\n' * newlines_after_close | 179 s += '\n' * newlines_after_close |
| 233 return s | 180 return s |
| 234 | 181 |
| 235 # Handle comment nodes. | 182 # Handle comment nodes. |
| 236 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: | 183 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: |
| 237 return '<!--%s-->\n' % node.data | 184 return '<!--%s-->\n' % node.data |
| 238 | 185 |
| 239 # Ignore other node types. This could be a processing instruction (<? ... ?>) | 186 # Ignore other node types. This could be a processing instruction (<? ... ?>) |
| 240 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the | 187 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the |
| 241 # histograms XML at present. | 188 # histograms XML at present. |
| 242 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) | 189 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) |
| 243 raise Error() | 190 raise Error() |
| 244 | |
| 245 | |
| 246 def unsafeAppendChild(parent, child): | |
| 247 """Append child to parent's list of children, ignoring the possibility that it | |
| 248 is already in another node's childNodes list. Requires that the previous | |
| 249 parent of child is discarded (to avoid non-tree DOM graphs). | |
| 250 This can provide a significant speedup as O(n^2) operations are removed (in | |
| 251 particular, each child insertion avoids the need to traverse the old parent's | |
| 252 entire list of children).""" | |
| 253 child.parentNode = None | |
| 254 parent.appendChild(child) | |
| 255 child.parentNode = parent | |
| 256 | |
| 257 | |
| 258 def TransformByAlphabetizing(node): | |
| 259 """Transform the given XML by alphabetizing specific node types according to | |
| 260 the rules in ALPHABETIZATION_RULES. | |
| 261 | |
| 262 Args: | |
| 263 node: The minidom node to transform. | |
| 264 | |
| 265 Returns: | |
| 266 The minidom node, with children appropriately alphabetized. Note that the | |
| 267 transformation is done in-place, i.e. the original minidom tree is modified | |
| 268 directly. | |
| 269 """ | |
| 270 if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE: | |
| 271 for c in node.childNodes: TransformByAlphabetizing(c) | |
| 272 return node | |
| 273 | |
| 274 # Element node with a tag name that we alphabetize the children of? | |
| 275 if node.tagName in ALPHABETIZATION_RULES: | |
| 276 # Put subnodes in a list of node,key pairs to allow for custom sorting. | |
| 277 subtag, key_function = ALPHABETIZATION_RULES[node.tagName] | |
| 278 subnodes = [] | |
| 279 last_key = -1 | |
| 280 for c in node.childNodes: | |
| 281 if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and | |
| 282 c.tagName == subtag): | |
| 283 last_key = key_function(c) | |
| 284 # Subnodes that we don't want to rearrange use the last node's key, | |
| 285 # so they stay in the same relative position. | |
| 286 subnodes.append( (c, last_key) ) | |
| 287 | |
| 288 # Sort the subnode list. | |
| 289 subnodes.sort(key=lambda pair: pair[1]) | |
| 290 | |
| 291 # Re-add the subnodes, transforming each recursively. | |
| 292 while node.firstChild: | |
| 293 node.removeChild(node.firstChild) | |
| 294 for (c, _) in subnodes: | |
| 295 unsafeAppendChild(node, TransformByAlphabetizing(c)) | |
| 296 return node | |
| 297 | |
| 298 # Recursively handle other element nodes and other node types. | |
| 299 for c in node.childNodes: TransformByAlphabetizing(c) | |
| 300 return node | |
| 301 | |
| 302 | |
| 303 def PrettyPrint(raw_xml): | |
| 304 """Pretty-print the given XML. | |
| 305 | |
| 306 Args: | |
| 307 xml: The contents of the histograms XML file, as a string. | |
| 308 | |
| 309 Returns: | |
| 310 The pretty-printed version. | |
| 311 """ | |
| 312 tree = xml.dom.minidom.parseString(raw_xml) | |
| 313 tree = TransformByAlphabetizing(tree) | |
| 314 return PrettyPrintNode(tree) | |
| 315 | |
| 316 | |
| 317 def main(): | |
| 318 logging.basicConfig(level=logging.INFO) | |
| 319 | |
| 320 presubmit = ('--presubmit' in sys.argv) | |
| 321 | |
| 322 histograms_filename = 'histograms.xml' | |
| 323 histograms_backup_filename = 'histograms.before.pretty-print.xml' | |
| 324 | |
| 325 script_dir = path_utils.ScriptDir() | |
| 326 | |
| 327 histograms_pathname = os.path.join(script_dir, histograms_filename) | |
| 328 histograms_backup_pathname = os.path.join(script_dir, | |
| 329 histograms_backup_filename) | |
| 330 | |
| 331 logging.info('Loading %s...' % histograms_filename) | |
| 332 with open(histograms_pathname, 'rb') as f: | |
| 333 xml = f.read() | |
| 334 | |
| 335 # Check there are no CR ('\r') characters in the file. | |
| 336 if '\r' in xml: | |
| 337 logging.info('DOS-style line endings (CR characters) detected - these are ' | |
| 338 'not allowed. Please run dos2unix %s' % histograms_filename) | |
| 339 sys.exit(1) | |
| 340 | |
| 341 logging.info('Pretty-printing...') | |
| 342 try: | |
| 343 pretty = PrettyPrint(xml) | |
| 344 except Error: | |
| 345 logging.error('Aborting parsing due to fatal errors.') | |
| 346 sys.exit(1) | |
| 347 | |
| 348 if xml == pretty: | |
| 349 logging.info('%s is correctly pretty-printed.' % histograms_filename) | |
| 350 sys.exit(0) | |
| 351 if presubmit: | |
| 352 logging.info('%s is not formatted correctly; run pretty_print.py to fix.' % | |
| 353 histograms_filename) | |
| 354 sys.exit(1) | |
| 355 if not diffutil.PromptUserToAcceptDiff( | |
| 356 xml, pretty, | |
| 357 'Is the prettified version acceptable?'): | |
| 358 logging.error('Aborting') | |
| 359 return | |
| 360 | |
| 361 logging.info('Creating backup file %s' % histograms_backup_filename) | |
| 362 shutil.move(histograms_pathname, histograms_backup_pathname) | |
| 363 | |
| 364 logging.info('Writing new %s file' % histograms_filename) | |
| 365 with open(histograms_pathname, 'wb') as f: | |
| 366 f.write(pretty) | |
| 367 | |
| 368 | |
| 369 if __name__ == '__main__': | |
| 370 main() | |
| OLD | NEW |